Am 15.02.22 um 09:54 schrieb Anton Khirnov: > Quoting Thilo Borgmann (2022-02-12 11:55:39) >> Am 31.01.22 um 12:55 schrieb James Almer: >> +static int config_input(AVFilterLink *inlink) >> +{ >> + // Video input data avilable >> + AVFilterContext *ctx = inlink->dst; >> + SiTiContext *s = ctx->priv; >> + int max_pixsteps[4]; >> + size_t pixel_sz; >> + size_t data_sz; >> + size_t gradient_sz; >> + size_t motion_sz; >> + >> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); >> + av_image_fill_max_pixsteps(max_pixsteps, NULL, desc); >> + >> + s->pixel_depth = max_pixsteps[0]; >> + s->width = inlink->w; >> + s->height = inlink->h; >> + pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t); >> + data_sz = s->width * pixel_sz * s->height; >> + >> + s->prev_frame = av_malloc(data_sz); >> + >> + gradient_sz = (s->width - 2) * sizeof(double) * (s->height - 2); >> + s->gradient_matrix = (double*)av_malloc(gradient_sz); >> + >> + motion_sz = s->width * sizeof(double) * s->height; >> + s->motion_matrix = (double*)av_malloc(motion_sz); > > useless casts > >> + >> + if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) { >> + av_freep(&s->prev_frame); >> + av_freep(&s->gradient_matrix); >> + av_freep(&s->motion_matrix); > > You don't need to free them on failure, that will be done in uninit. But > you should free them at the beginning of this function, because > config_input can be called multiple times. Always freep'd and fail on alloc error. >> +// Applies sobel convolution >> +static void convolve_sobel(SiTiContext *s, const uint8_t *src, double *dst, int linesize) >> +{ >> + double x_conv_sum; >> + double y_conv_sum; >> + double gradient; >> + int ki; >> + int kj; >> + int index; >> + uint16_t data; >> + int filter_width = 3; >> + int filter_size = filter_width * filter_width; >> + int stride = linesize / s->pixel_depth; >> + >> + // Dst matrix is smaller than src since we ignore edges that can't be convolved >> + #define CONVOLVE(bps) \ >> + { \ >> + uint##bps##_t *vsrc = (uint##bps##_t*)src; \ >> + for (int j = 1; j < s->height - 1; j++) { \ >> + for (int i = 1; i < s->width - 1; i++) { \ >> + x_conv_sum = 0.0; \ >> + y_conv_sum = 0.0; \ >> + for (int k = 0; k < filter_size; k++) { \ >> + ki = k % filter_width - 1; \ >> + kj = floor(k / filter_width) - 1; \ >> + index = (j + kj) * stride + (i + ki); \ >> + data = convert_full_range(s, vsrc[index]); \ > > Pass bps as a parameter to convert_full_range() instead of accessing > s->pixel_depth, so the compiler can optimize the branch away. I am not sure if the changes I did here suit the optimization you had in mind... pls check if v4 does this right. > >> +// Calculate pixel difference between current and previous frame, and update previous >> +static void calculate_motion(SiTiContext *s, const uint8_t *curr, >> + double *motion_matrix, int linesize) >> +{ >> + int stride = linesize / s->pixel_depth; >> + double motion; >> + int curr_index; >> + int prev_index; >> + uint16_t curr_data; >> + >> + // Previous frame is already converted to full range >> + #define CALCULATE(bps) \ >> + { \ >> + uint##bps##_t *vsrc = (uint##bps##_t*)curr; \ >> + for (int j = 0; j < s->height; j++) { \ >> + for (int i = 0; i < s->width; i++) { \ >> + motion = 0; \ >> + curr_index = j * stride + i; \ >> + prev_index = j * s->width + i; \ >> + curr_data = convert_full_range(s, vsrc[curr_index]); \ >> + if (s->nb_frames > 1) \ >> + motion = curr_data - s->prev_frame[prev_index]; \ >> + s->prev_frame[prev_index] = curr_data; \ > > previous code accessed this as uint8_t or uint16_t based on bps > Fixed in v4. Attached. Thanks, Thilo