Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients.
@ 2022-01-10 14:58 Alan Kelly
  2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 2/4] libswscale: Avx2 hscale can process any input of size which is a multiple of 4 Alan Kelly
                   ` (4 more replies)
  0 siblings, 5 replies; 7+ messages in thread
From: Alan Kelly @ 2022-01-10 14:58 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Alan Kelly

Make the code more readable, follow the style guide and propagate memory
allocation errors.
---
 libswscale/swscale_internal.h |  2 +-
 libswscale/utils.c            | 68 ++++++++++++++++++++---------------
 2 files changed, 40 insertions(+), 30 deletions(-)

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 3a78d95ba6..26d28d42e6 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int threadnr,
 #define MAX_LINES_AHEAD 4
 
 //shuffle filter and filterPos for hyScale and hcScale filters in avx2
-void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
+int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
 #endif /* SWSCALE_SWSCALE_INTERNAL_H */
diff --git a/libswscale/utils.c b/libswscale/utils.c
index c5ea8853d5..52f07e1661 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
     [AV_PIX_FMT_P416LE]      = { 1, 1 },
 };
 
-void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW){
+int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
+                                   int filterSize, int16_t *filter,
+                                   int dstW)
+{
 #if ARCH_X86_64
-    int i, j, k, l;
+    int i = 0, j = 0, k = 0;
     int cpu_flags = av_get_cpu_flags();
+    if (!filter || dstW % 16 != 0) return 0;
     if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
-        if ((c->srcBpc == 8) && (c->dstBpc <= 14)){
-            if (dstW % 16 == 0){
-                if (filter != NULL){
-                    for (i = 0; i < dstW; i += 8){
-                        FFSWAP(int, filterPos[i + 2], filterPos[i+4]);
-                        FFSWAP(int, filterPos[i + 3], filterPos[i+5]);
-                    }
-                    if (filterSize > 4){
-                        int16_t *tmp2 = av_malloc(dstW * filterSize * 2);
-                        memcpy(tmp2, filter, dstW * filterSize * 2);
-                        for (i = 0; i < dstW; i += 16){//pixel
-                            for (k = 0; k < filterSize / 4; ++k){//fcoeff
-                                for (j = 0; j < 16; ++j){//inner pixel
-                                    for (l = 0; l < 4; ++l){//coeff
-                                        int from = i * filterSize + j * filterSize + k * 4 + l;
-                                        int to = (i) * filterSize + j * 4 + l + k * 64;
-                                        filter[to] = tmp2[from];
-                                    }
-                                }
-                            }
-                        }
-                        av_free(tmp2);
-                    }
-                }
-            }
+        if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
+           int16_t *filterCopy = NULL;
+           if (filterSize > 4) {
+               if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize))
+                   return AVERROR(ENOMEM);
+               memcpy(filterCopy, filter, dstW * filterSize * sizeof(int16_t));
+           }
+           // Do not swap filterPos for pixels which won't be processed by
+           // the main loop.
+           for (i = 0; i + 8 <= dstW; i += 8) {
+               FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
+               FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
+           }
+           if (filterSize > 4) {
+               // 16 pixels are processed at a time.
+               for (i = 0; i + 16 <= dstW; i += 16) {
+                   // 4 filter coeffs are processed at a time.
+                   for (k = 0; k + 4 <= filterSize; k += 4) {
+                       for (j = 0; j < 16; ++j) {
+                           int from = (i + j) * filterSize + k;
+                           int to = i * filterSize + j * 4 + k * 16;
+                           memcpy(&filter[to], &filterCopy[from], 4 * sizeof(int16_t));
+                       }
+                   }
+               }
+           }
+           if (filterCopy)
+               av_free(filterCopy);
         }
     }
 #endif
+    return 0;
 }
 
 int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
@@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                            get_local_pos(c, 0, 0, 0),
                            get_local_pos(c, 0, 0, 0))) < 0)
                 goto fail;
-            ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW);
+            if ((ret = ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
+                goto nomem;
             if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos,
                            &c->hChrFilterSize, c->chrXInc,
                            c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
@@ -1846,7 +1855,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
                            get_local_pos(c, c->chrSrcHSubSample, c->src_h_chr_pos, 0),
                            get_local_pos(c, c->chrDstHSubSample, c->dst_h_chr_pos, 0))) < 0)
                 goto fail;
-            ff_shuffle_filter_coefficients(c, c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW);
+            if ((ret = ff_shuffle_filter_coefficients(c, c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW)) != 0)
+                goto nomem;
         }
     } // initialize horizontal stuff
 
-- 
2.34.1.575.g55b058a8bb-goog

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH 2/4] libswscale: Avx2 hscale can process any input of size which is a multiple of 4.
  2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
@ 2022-01-10 14:58 ` Alan Kelly
  2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 3/4] libswscale: Enable hscale_avx2 for input sizes which ar emultiples " Alan Kelly
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2022-01-10 14:58 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Alan Kelly

The main loop processes blocks of 16 pixels. The tail processes blocks
of size 4.
---
 libswscale/x86/scale_avx2.asm | 48 +++++++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/libswscale/x86/scale_avx2.asm b/libswscale/x86/scale_avx2.asm
index 20acdbd633..dc42abb100 100644
--- a/libswscale/x86/scale_avx2.asm
+++ b/libswscale/x86/scale_avx2.asm
@@ -53,6 +53,9 @@ cglobal hscale8to15_%1, 7, 9, 16, pos0, dst, w, srcmem, filter, fltpos, fltsize,
     mova m14, [four]
     shr fltsized, 2
 %endif
+    cmp wq, 16
+    jl .tail_loop
+    mov countq, 0x10
 .loop:
     movu m1, [fltposq]
     movu m2, [fltposq+32]
@@ -97,11 +100,52 @@ cglobal hscale8to15_%1, 7, 9, 16, pos0, dst, w, srcmem, filter, fltpos, fltsize,
     vpsrad  m6, 7
     vpackssdw m5, m5, m6
     vpermd m5, m15, m5
-    vmovdqu [dstq + countq * 2], m5
+    vmovdqu [dstq], m5
+    add dstq, 0x20
     add fltposq, 0x40
     add countq, 0x10
     cmp countq, wq
-    jl .loop
+    jle .loop
+
+    sub countq, 0x10
+    cmp countq, wq
+    jge .end
+
+.tail_loop:
+    movu xm1, [fltposq]
+%ifidn %1, X4
+    pxor xm9, xm9
+    pxor xm10, xm10
+    xor innerq, innerq
+.tail_innerloop:
+%endif
+    vpcmpeqd  xm13, xm13
+    vpgatherdd xm3,[srcmemq + xm1], xm13
+    vpunpcklbw xm5, xm3, xm0
+    vpunpckhbw xm6, xm3, xm0
+    vpmaddwd xm5, xm5, [filterq]
+    vpmaddwd xm6, xm6, [filterq + 16]
+    add filterq, 0x20
+%ifidn %1, X4
+    paddd xm9, xm5
+    paddd xm10, xm6
+    paddd xm1, xm14
+    add innerq, 1
+    cmp innerq, fltsizeq
+    jl .tail_innerloop
+    vphaddd xm5, xm9, xm10
+%else
+    vphaddd xm5, xm5, xm6
+%endif
+    vpsrad  xm5, 7
+    vpackssdw xm5, xm5, xm5
+    vmovq [dstq], xm5
+    add dstq, 0x8
+    add fltposq, 0x10
+    add countq, 0x4
+    cmp countq, wq
+    jl .tail_loop
+.end:
 REP_RET
 %endmacro
 
-- 
2.34.1.575.g55b058a8bb-goog

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] libswscale: Enable hscale_avx2 for input sizes which ar emultiples of 4.
  2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
  2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 2/4] libswscale: Avx2 hscale can process any input of size which is a multiple of 4 Alan Kelly
@ 2022-01-10 14:58 ` Alan Kelly
  2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 4/4] checkasm/sw_scale: hscale does not requires cpuflag test Alan Kelly
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2022-01-10 14:58 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Alan Kelly

ff_shuffle_filter_coefficients shuffles the tail as required.
---
 libswscale/utils.c       | 17 +++++++++++++++--
 libswscale/x86/swscale.c |  4 ++--
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/libswscale/utils.c b/libswscale/utils.c
index 52f07e1661..7e1e9c3834 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -285,7 +285,7 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
 #if ARCH_X86_64
     int i = 0, j = 0, k = 0;
     int cpu_flags = av_get_cpu_flags();
-    if (!filter || dstW % 16 != 0) return 0;
+    if (!filter || (dstW % 4 != 0)) return 0;
     if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
         if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
            int16_t *filterCopy = NULL;
@@ -296,9 +296,11 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
            }
            // Do not swap filterPos for pixels which won't be processed by
            // the main loop.
-           for (i = 0; i + 8 <= dstW; i += 8) {
+           for (i = 0; i + 16 <= dstW; i += 16) {
                FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
                FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
+               FFSWAP(int, filterPos[i + 10], filterPos[i + 12]);
+               FFSWAP(int, filterPos[i + 11], filterPos[i + 13]);
            }
            if (filterSize > 4) {
                // 16 pixels are processed at a time.
@@ -312,6 +314,17 @@ int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
                        }
                    }
                }
+               // 4 pixels are processed at a time in the tail.
+               for (; i + 4 <= dstW; i += 4) {
+                   // 4 filter coeffs are processed at a time.
+                   for (k = 0; k + 4 <= filterSize; k += 4) {
+                       for (j = 0; j < 4; ++j) {
+                           int from = (i + j) * filterSize + k;
+                           int to = i * filterSize + j * 4 + k * 4;
+                           memcpy(&filter[to], &filterCopy[from], 4 * sizeof(int16_t));
+                       }
+                   }
+               }
            }
            if (filterCopy)
                av_free(filterCopy);
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index fdc93866a6..1d8f19aa5a 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -580,9 +580,9 @@ switch(c->dstBpc){ \
 
     if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER)) {
         if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
-            if (c->chrDstW % 16 == 0)
+            if (c->chrDstW % 4 == 0)
                 ASSIGN_AVX2_SCALE_FUNC(c->hcScale, c->hChrFilterSize);
-            if (c->dstW % 16 == 0)
+            if (c->dstW % 4 == 0)
                 ASSIGN_AVX2_SCALE_FUNC(c->hyScale, c->hLumFilterSize);
         }
     }
-- 
2.34.1.575.g55b058a8bb-goog

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [FFmpeg-devel] [PATCH 4/4] checkasm/sw_scale: hscale does not requires cpuflag test.
  2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
  2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 2/4] libswscale: Avx2 hscale can process any input of size which is a multiple of 4 Alan Kelly
  2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 3/4] libswscale: Enable hscale_avx2 for input sizes which ar emultiples " Alan Kelly
@ 2022-01-10 14:58 ` Alan Kelly
  2022-02-02 12:21 ` [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
  2022-02-03 14:11 ` Michael Niedermayer
  4 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2022-01-10 14:58 UTC (permalink / raw)
  To: ffmpeg-devel; +Cc: Alan Kelly

This is done in ff_shuffle_filter_coefficients.
---
 tests/checkasm/sw_scale.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/checkasm/sw_scale.c b/tests/checkasm/sw_scale.c
index 3c0a083b42..e7f916d3a8 100644
--- a/tests/checkasm/sw_scale.c
+++ b/tests/checkasm/sw_scale.c
@@ -168,8 +168,6 @@ static void check_hscale(void)
                       const uint8_t *src, const int16_t *filter,
                       const int32_t *filterPos, int filterSize);
 
-    int cpu_flags = av_get_cpu_flags();
-
     ctx = sws_alloc_context();
     if (sws_init_context(ctx, NULL, NULL) < 0)
         fail();
@@ -215,10 +213,10 @@ static void check_hscale(void)
 
                 filter[SRC_PIXELS * width + i] = rnd();
             }
+
             ff_sws_init_scale(ctx);
             memcpy(filterAvx2, filter, sizeof(uint16_t) * (SRC_PIXELS * MAX_FILTER_WIDTH + MAX_FILTER_WIDTH));
-            if ((cpu_flags & AV_CPU_FLAG_AVX2) && !(cpu_flags & AV_CPU_FLAG_SLOW_GATHER))
-                ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS);
+            ff_shuffle_filter_coefficients(ctx, filterPosAvx, width, filterAvx2, SRC_PIXELS);
 
             if (check_func(ctx->hcScale, "hscale_%d_to_%d_width%d", ctx->srcBpc, ctx->dstBpc + 1, width)) {
                 memset(dst0, 0, SRC_PIXELS * sizeof(dst0[0]));
-- 
2.34.1.575.g55b058a8bb-goog

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients.
  2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
                   ` (2 preceding siblings ...)
  2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 4/4] checkasm/sw_scale: hscale does not requires cpuflag test Alan Kelly
@ 2022-02-02 12:21 ` Alan Kelly
  2022-02-03 14:11 ` Michael Niedermayer
  4 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2022-02-02 12:21 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Hi,

Is anybody interested in this patch set?

Thanks!

On Mon, Jan 10, 2022, 15:58 Alan Kelly <alankelly@google.com> wrote:

> Make the code more readable, follow the style guide and propagate memory
> allocation errors.
> ---
>  libswscale/swscale_internal.h |  2 +-
>  libswscale/utils.c            | 68 ++++++++++++++++++++---------------
>  2 files changed, 40 insertions(+), 30 deletions(-)
>
> diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
> index 3a78d95ba6..26d28d42e6 100644
> --- a/libswscale/swscale_internal.h
> +++ b/libswscale/swscale_internal.h
> @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int
> threadnr,
>  #define MAX_LINES_AHEAD 4
>
>  //shuffle filter and filterPos for hyScale and hcScale filters in avx2
> -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
> +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
>  #endif /* SWSCALE_SWSCALE_INTERNAL_H */
> diff --git a/libswscale/utils.c b/libswscale/utils.c
> index c5ea8853d5..52f07e1661 100644
> --- a/libswscale/utils.c
> +++ b/libswscale/utils.c
> @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
>      [AV_PIX_FMT_P416LE]      = { 1, 1 },
>  };
>
> -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int
> filterSize, int16_t *filter, int dstW){
> +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
> +                                   int filterSize, int16_t *filter,
> +                                   int dstW)
> +{
>  #if ARCH_X86_64
> -    int i, j, k, l;
> +    int i = 0, j = 0, k = 0;
>      int cpu_flags = av_get_cpu_flags();
> +    if (!filter || dstW % 16 != 0) return 0;
>      if (EXTERNAL_AVX2_FAST(cpu_flags) && !(cpu_flags &
> AV_CPU_FLAG_SLOW_GATHER)) {
> -        if ((c->srcBpc == 8) && (c->dstBpc <= 14)){
> -            if (dstW % 16 == 0){
> -                if (filter != NULL){
> -                    for (i = 0; i < dstW; i += 8){
> -                        FFSWAP(int, filterPos[i + 2], filterPos[i+4]);
> -                        FFSWAP(int, filterPos[i + 3], filterPos[i+5]);
> -                    }
> -                    if (filterSize > 4){
> -                        int16_t *tmp2 = av_malloc(dstW * filterSize * 2);
> -                        memcpy(tmp2, filter, dstW * filterSize * 2);
> -                        for (i = 0; i < dstW; i += 16){//pixel
> -                            for (k = 0; k < filterSize / 4; ++k){//fcoeff
> -                                for (j = 0; j < 16; ++j){//inner pixel
> -                                    for (l = 0; l < 4; ++l){//coeff
> -                                        int from = i * filterSize + j *
> filterSize + k * 4 + l;
> -                                        int to = (i) * filterSize + j * 4
> + l + k * 64;
> -                                        filter[to] = tmp2[from];
> -                                    }
> -                                }
> -                            }
> -                        }
> -                        av_free(tmp2);
> -                    }
> -                }
> -            }
> +        if ((c->srcBpc == 8) && (c->dstBpc <= 14)) {
> +           int16_t *filterCopy = NULL;
> +           if (filterSize > 4) {
> +               if (!FF_ALLOC_TYPED_ARRAY(filterCopy, dstW * filterSize))
> +                   return AVERROR(ENOMEM);
> +               memcpy(filterCopy, filter, dstW * filterSize *
> sizeof(int16_t));
> +           }
> +           // Do not swap filterPos for pixels which won't be processed by
> +           // the main loop.
> +           for (i = 0; i + 8 <= dstW; i += 8) {
> +               FFSWAP(int, filterPos[i + 2], filterPos[i + 4]);
> +               FFSWAP(int, filterPos[i + 3], filterPos[i + 5]);
> +           }
> +           if (filterSize > 4) {
> +               // 16 pixels are processed at a time.
> +               for (i = 0; i + 16 <= dstW; i += 16) {
> +                   // 4 filter coeffs are processed at a time.
> +                   for (k = 0; k + 4 <= filterSize; k += 4) {
> +                       for (j = 0; j < 16; ++j) {
> +                           int from = (i + j) * filterSize + k;
> +                           int to = i * filterSize + j * 4 + k * 16;
> +                           memcpy(&filter[to], &filterCopy[from], 4 *
> sizeof(int16_t));
> +                       }
> +                   }
> +               }
> +           }
> +           if (filterCopy)
> +               av_free(filterCopy);
>          }
>      }
>  #endif
> +    return 0;
>  }
>
>  int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
> @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c,
> SwsFilter *srcFilter,
>                             get_local_pos(c, 0, 0, 0),
>                             get_local_pos(c, 0, 0, 0))) < 0)
>                  goto fail;
> -            ff_shuffle_filter_coefficients(c, c->hLumFilterPos,
> c->hLumFilterSize, c->hLumFilter, dstW);
> +            if ((ret = ff_shuffle_filter_coefficients(c,
> c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
> +                goto nomem;
>              if ((ret = initFilter(&c->hChrFilter, &c->hChrFilterPos,
>                             &c->hChrFilterSize, c->chrXInc,
>                             c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
> @@ -1846,7 +1855,8 @@ av_cold int sws_init_context(SwsContext *c,
> SwsFilter *srcFilter,
>                             get_local_pos(c, c->chrSrcHSubSample,
> c->src_h_chr_pos, 0),
>                             get_local_pos(c, c->chrDstHSubSample,
> c->dst_h_chr_pos, 0))) < 0)
>                  goto fail;
> -            ff_shuffle_filter_coefficients(c, c->hChrFilterPos,
> c->hChrFilterSize, c->hChrFilter, c->chrDstW);
> +            if ((ret = ff_shuffle_filter_coefficients(c,
> c->hChrFilterPos, c->hChrFilterSize, c->hChrFilter, c->chrDstW)) != 0)
> +                goto nomem;
>          }
>      } // initialize horizontal stuff
>
> --
> 2.34.1.575.g55b058a8bb-goog
>
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients.
  2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
                   ` (3 preceding siblings ...)
  2022-02-02 12:21 ` [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
@ 2022-02-03 14:11 ` Michael Niedermayer
  2022-02-09  9:17   ` Alan Kelly
  4 siblings, 1 reply; 7+ messages in thread
From: Michael Niedermayer @ 2022-02-03 14:11 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2784 bytes --]

On Mon, Jan 10, 2022 at 03:58:33PM +0100, Alan Kelly wrote:
> Make the code more readable, follow the style guide and propagate memory
> allocation errors.

Cosmetics and bugfixes should not be in the same patch


> ---
>  libswscale/swscale_internal.h |  2 +-
>  libswscale/utils.c            | 68 ++++++++++++++++++++---------------
>  2 files changed, 40 insertions(+), 30 deletions(-)
> 
> diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
> index 3a78d95ba6..26d28d42e6 100644
> --- a/libswscale/swscale_internal.h
> +++ b/libswscale/swscale_internal.h
> @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr, int threadnr,
>  #define MAX_LINES_AHEAD 4
>  
>  //shuffle filter and filterPos for hyScale and hcScale filters in avx2
> -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
> +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
>  #endif /* SWSCALE_SWSCALE_INTERNAL_H */
> diff --git a/libswscale/utils.c b/libswscale/utils.c
> index c5ea8853d5..52f07e1661 100644
> --- a/libswscale/utils.c
> +++ b/libswscale/utils.c
> @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
>      [AV_PIX_FMT_P416LE]      = { 1, 1 },
>  };
>  
> -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int filterSize, int16_t *filter, int dstW){
> +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
> +                                   int filterSize, int16_t *filter,
> +                                   int dstW)
> +{
>  #if ARCH_X86_64

> -    int i, j, k, l;
> +    int i = 0, j = 0, k = 0;

why?
they are set when used if iam not mistaken


>      int cpu_flags = av_get_cpu_flags();

> +    if (!filter || dstW % 16 != 0) return 0;

please add \n also a comment what the dstW & 16 case exactly does and why


[...]
>  int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
> @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
>                             get_local_pos(c, 0, 0, 0),
>                             get_local_pos(c, 0, 0, 0))) < 0)
>                  goto fail;
> -            ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW);
> +            if ((ret = ff_shuffle_filter_coefficients(c, c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
> +                goto nomem;

This is confusing as ret is never used, also error codes are <0

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Those who are best at talking, realize last or never when they are wrong.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients.
  2022-02-03 14:11 ` Michael Niedermayer
@ 2022-02-09  9:17   ` Alan Kelly
  0 siblings, 0 replies; 7+ messages in thread
From: Alan Kelly @ 2022-02-09  9:17 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Hi Michael,

Thanks for your feedback. I have updated the patches and split this patch
into two, one with cosmetic fixes and one propagating the errors. Since
there is now an extra patch in the set and the commit messages have
changed, new threads have been started.

Alan

On Thu, Feb 3, 2022 at 3:11 PM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> On Mon, Jan 10, 2022 at 03:58:33PM +0100, Alan Kelly wrote:
> > Make the code more readable, follow the style guide and propagate memory
> > allocation errors.
>
> Cosmetics and bugfixes should not be in the same patch
>
>
> > ---
> >  libswscale/swscale_internal.h |  2 +-
> >  libswscale/utils.c            | 68 ++++++++++++++++++++---------------
> >  2 files changed, 40 insertions(+), 30 deletions(-)
> >
> > diff --git a/libswscale/swscale_internal.h
> b/libswscale/swscale_internal.h
> > index 3a78d95ba6..26d28d42e6 100644
> > --- a/libswscale/swscale_internal.h
> > +++ b/libswscale/swscale_internal.h
> > @@ -1144,5 +1144,5 @@ void ff_sws_slice_worker(void *priv, int jobnr,
> int threadnr,
> >  #define MAX_LINES_AHEAD 4
> >
> >  //shuffle filter and filterPos for hyScale and hcScale filters in avx2
> > -void ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
> > +int ff_shuffle_filter_coefficients(SwsContext *c, int* filterPos, int
> filterSize, int16_t *filter, int dstW);
> >  #endif /* SWSCALE_SWSCALE_INTERNAL_H */
> > diff --git a/libswscale/utils.c b/libswscale/utils.c
> > index c5ea8853d5..52f07e1661 100644
> > --- a/libswscale/utils.c
> > +++ b/libswscale/utils.c
> > @@ -278,39 +278,47 @@ static const FormatEntry format_entries[] = {
> >      [AV_PIX_FMT_P416LE]      = { 1, 1 },
> >  };
> >
> > -void ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos, int
> filterSize, int16_t *filter, int dstW){
> > +int ff_shuffle_filter_coefficients(SwsContext *c, int *filterPos,
> > +                                   int filterSize, int16_t *filter,
> > +                                   int dstW)
> > +{
> >  #if ARCH_X86_64
>
> > -    int i, j, k, l;
> > +    int i = 0, j = 0, k = 0;
>
> why?
> they are set when used if iam not mistaken
>
>
> >      int cpu_flags = av_get_cpu_flags();
>
> > +    if (!filter || dstW % 16 != 0) return 0;
>
> please add \n also a comment what the dstW & 16 case exactly does and why
>
>
> [...]
> >  int sws_isSupportedInput(enum AVPixelFormat pix_fmt)
> > @@ -1836,7 +1844,8 @@ av_cold int sws_init_context(SwsContext *c,
> SwsFilter *srcFilter,
> >                             get_local_pos(c, 0, 0, 0),
> >                             get_local_pos(c, 0, 0, 0))) < 0)
> >                  goto fail;
> > -            ff_shuffle_filter_coefficients(c, c->hLumFilterPos,
> c->hLumFilterSize, c->hLumFilter, dstW);
> > +            if ((ret = ff_shuffle_filter_coefficients(c,
> c->hLumFilterPos, c->hLumFilterSize, c->hLumFilter, dstW)) != 0)
> > +                goto nomem;
>
> This is confusing as ret is never used, also error codes are <0
>
> thx
>
> [...]
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Those who are best at talking, realize last or never when they are wrong.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-02-09  9:17 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-10 14:58 [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 2/4] libswscale: Avx2 hscale can process any input of size which is a multiple of 4 Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 3/4] libswscale: Enable hscale_avx2 for input sizes which ar emultiples " Alan Kelly
2022-01-10 14:58 ` [FFmpeg-devel] [PATCH 4/4] checkasm/sw_scale: hscale does not requires cpuflag test Alan Kelly
2022-02-02 12:21 ` [FFmpeg-devel] [PATCH 1/4] libswscale: Re-factor ff_shuffle_filter_coefficients Alan Kelly
2022-02-03 14:11 ` Michael Niedermayer
2022-02-09  9:17   ` Alan Kelly

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git