Am 15.02.22 um 09:54 schrieb Anton Khirnov:
> Quoting Thilo Borgmann (2022-02-12 11:55:39)
>> Am 31.01.22 um 12:55 schrieb James Almer:
>> +static int config_input(AVFilterLink *inlink)
>> +{
>> +    // Video input data avilable
>> +    AVFilterContext *ctx = inlink->dst;
>> +    SiTiContext *s = ctx->priv;
>> +    int max_pixsteps[4];
>> +    size_t pixel_sz;
>> +    size_t data_sz;
>> +    size_t gradient_sz;
>> +    size_t motion_sz;
>> +
>> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
>> +    av_image_fill_max_pixsteps(max_pixsteps, NULL, desc);
>> +
>> +    s->pixel_depth = max_pixsteps[0];
>> +    s->width = inlink->w;
>> +    s->height = inlink->h;
>> +    pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t);
>> +    data_sz = s->width * pixel_sz * s->height;
>> +
>> +    s->prev_frame = av_malloc(data_sz);
>> +
>> +    gradient_sz = (s->width - 2) * sizeof(double) * (s->height - 2);
>> +    s->gradient_matrix = (double*)av_malloc(gradient_sz);
>> +
>> +    motion_sz = s->width * sizeof(double) * s->height;
>> +    s->motion_matrix = (double*)av_malloc(motion_sz);
> 
> useless casts
> 
>> +
>> +    if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) {
>> +        av_freep(&s->prev_frame);
>> +        av_freep(&s->gradient_matrix);
>> +        av_freep(&s->motion_matrix);
> 
> You don't need to free them on failure, that will be done in uninit. But
> you should free them at the beginning of this function, because
> config_input can be called multiple times.

Always freep'd and fail on alloc error.


>> +// Applies sobel convolution
>> +static void convolve_sobel(SiTiContext *s, const uint8_t *src, double *dst, int linesize)
>> +{
>> +    double x_conv_sum;
>> +    double y_conv_sum;
>> +    double gradient;
>> +    int ki;
>> +    int kj;
>> +    int index;
>> +    uint16_t data;
>> +    int filter_width = 3;
>> +    int filter_size = filter_width * filter_width;
>> +    int stride = linesize / s->pixel_depth;
>> +
>> +    // Dst matrix is smaller than src since we ignore edges that can't be convolved
>> +    #define CONVOLVE(bps)                                           \
>> +    {                                                               \
>> +        uint##bps##_t *vsrc = (uint##bps##_t*)src;                  \
>> +        for (int j = 1; j < s->height - 1; j++) {                   \
>> +            for (int i = 1; i < s->width - 1; i++) {                \
>> +                x_conv_sum = 0.0;                                   \
>> +                y_conv_sum = 0.0;                                   \
>> +                for (int k = 0; k < filter_size; k++) {             \
>> +                    ki = k % filter_width - 1;                      \
>> +                    kj = floor(k / filter_width) - 1;               \
>> +                    index = (j + kj) * stride + (i + ki);           \
>> +                    data = convert_full_range(s, vsrc[index]);      \
> 
> Pass bps as a parameter to convert_full_range() instead of accessing
> s->pixel_depth, so the compiler can optimize the branch away.

I am not sure if the changes I did here suit the optimization you had in mind... pls check if v4 does this right.


> 
>> +// Calculate pixel difference between current and previous frame, and update previous
>> +static void calculate_motion(SiTiContext *s, const uint8_t *curr,
>> +                             double *motion_matrix, int linesize)
>> +{
>> +    int stride = linesize / s->pixel_depth;
>> +    double motion;
>> +    int curr_index;
>> +    int prev_index;
>> +    uint16_t curr_data;
>> +
>> +    // Previous frame is already converted to full range
>> +    #define CALCULATE(bps)                                           \
>> +    {                                                                \
>> +        uint##bps##_t *vsrc = (uint##bps##_t*)curr;                  \
>> +        for (int j = 0; j < s->height; j++) {                        \
>> +            for (int i = 0; i < s->width; i++) {                     \
>> +                motion = 0;                                          \
>> +                curr_index = j * stride + i;                         \
>> +                prev_index = j * s->width + i;                       \
>> +                curr_data = convert_full_range(s, vsrc[curr_index]); \
>> +                if (s->nb_frames > 1)                                \
>> +                    motion = curr_data - s->prev_frame[prev_index];  \
>> +                s->prev_frame[prev_index] = curr_data;               \
> 
> previous code accessed this as uint8_t or uint16_t based on bps
> 

Fixed in v4. Attached.

Thanks,
Thilo