Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCHv3 1/4] lavc/h263dsp: add DCT dequantisation functions
@ 2024-06-09 16:23 Rémi Denis-Courmont
  2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-09 16:23 UTC (permalink / raw)
  To: ffmpeg-devel

Note that optimised implementations of these functions will be taken
into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
are *not* overloaded by existing optimisations.

---
Compared to version 2, this separates inter and intra functions to
ease writing aligned-dependent optimisations.

---
 libavcodec/h263dsp.c | 24 ++++++++++++++++++++++++
 libavcodec/h263dsp.h |  4 ++++
 2 files changed, 28 insertions(+)

diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
index 6a13353499..1c6cf85a70 100644
--- a/libavcodec/h263dsp.c
+++ b/libavcodec/h263dsp.c
@@ -23,6 +23,28 @@
 #include "config.h"
 #include "h263dsp.h"
 
+static void h263_dct_unquantize_inter_c(int16_t *block, size_t len,
+                                        int qmul, int qadd)
+{
+    for (size_t i = 0; i < len; i++) {
+        int level = block[i];
+
+        if (level) {
+            if (level < 0)
+                level = level * qmul - qadd;
+            else
+                level = level * qmul + qadd;
+            block[i] = level;
+        }
+    }
+}
+
+static void h263_dct_unquantize_intra_c(int16_t *block, size_t len,
+                                        int qmul, int qadd)
+{
+    h263_dct_unquantize_inter_c(block + 1, len - 1, qmul, qadd);
+}
+
 const uint8_t ff_h263_loop_filter_strength[32] = {
     0, 1, 1, 2, 2, 3, 3,  4,  4,  4,  5,  5,  6,  6,  7, 7,
     7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12
@@ -116,6 +138,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale)
 
 av_cold void ff_h263dsp_init(H263DSPContext *ctx)
 {
+    ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c;
+    ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c;
     ctx->h263_h_loop_filter = h263_h_loop_filter_c;
     ctx->h263_v_loop_filter = h263_v_loop_filter_c;
 
diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
index 2dccd23392..0ecbe83314 100644
--- a/libavcodec/h263dsp.h
+++ b/libavcodec/h263dsp.h
@@ -24,6 +24,10 @@
 extern const uint8_t ff_h263_loop_filter_strength[32];
 
 typedef struct H263DSPContext {
+    void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */,
+                                      size_t len, int mul, int add);
+    void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */,
+                                      size_t len, int mul, int add);
     void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
     void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
 } H263DSPContext;
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-09 16:23 [FFmpeg-devel] [PATCHv3 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
@ 2024-06-09 16:23 ` Rémi Denis-Courmont
  2024-06-10 11:41   ` Michael Niedermayer
  2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
  2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V " Rémi Denis-Courmont
  2 siblings, 1 reply; 14+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-09 16:23 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libavcodec/mpegvideo.c | 44 ++++++++++--------------------------------
 1 file changed, 10 insertions(+), 34 deletions(-)

diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 7af823b8bd..9be0fecc8d 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
-    int nCoeffs;
+    int qmul = qscale << 1;
+    int qadd, nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
 
-    qmul = qscale << 1;
-
     if (!s->h263_aic) {
         block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
         qadd = (qscale - 1) | 1;
@@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
         qadd = 0;
     }
     if(s->ac_pred)
-        nCoeffs=63;
+        nCoeffs = 64;
     else
-        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
+        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
 
-    for(i=1; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
 }
 
 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
+    int qmul = qscale << 1;
+    int qadd = (qscale - 1) | 1;
     int nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0);
 
-    qadd = (qscale - 1) | 1;
-    qmul = qscale << 1;
-
-    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-
-    for(i=0; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
+    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
 }
 
 
@@ -275,6 +250,7 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
 static av_cold int dct_init(MpegEncContext *s)
 {
     ff_blockdsp_init(&s->bdsp);
+    ff_h263dsp_init(&s->h263dsp);
     ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
     ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
 
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-09 16:23 [FFmpeg-devel] [PATCHv3 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
  2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
@ 2024-06-09 16:23 ` Rémi Denis-Courmont
  2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V " Rémi Denis-Courmont
  2 siblings, 0 replies; 14+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-09 16:23 UTC (permalink / raw)
  To: ffmpeg-devel

---
 tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
index 2d0957a90b..b21854d061 100644
--- a/tests/checkasm/h263dsp.c
+++ b/tests/checkasm/h263dsp.c
@@ -18,13 +18,55 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <stdbool.h>
 #include <string.h>
 
 #include "checkasm.h"
 
-#include "libavcodec/h263dsp.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
+#include "libavcodec/h263dsp.h"
+#include "libavcodec/mpegvideodata.h"
+
+static uint_fast8_t mpeg_qscale_rnd(void)
+{
+    int n = rnd(), q = (n >> 1) & 31;
+
+    if (n & 1)
+        return ff_mpeg2_non_linear_qscale[q];
+    else
+        return q << 1;
+}
+
+typedef void (*unquantizer)(int16_t *, size_t, int, int);
+
+static void check_dct_unquantize(unquantizer func, const char *name)
+{
+#define LEN 64
+    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
+    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
+    size_t len = rnd() % (LEN + 1);
+    const int qscale = mpeg_qscale_rnd();
+    const int qmul = qscale << 1;
+    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
+
+    declare_func(void, int16_t *, size_t, int, int);
+
+    for (size_t i = 0; i < LEN; i++)
+        block1[i] = block0[i] = (rnd() & 1) ? rnd() : 0;
+
+    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
+        av_assert0(len <= LEN);
+        call_ref(block0, len, qmul, qadd);
+        call_new(block1, len, qmul, qadd);
+
+        if (memcmp(block0, block1, len * sizeof (int16_t)))
+            fail();
+
+        bench_new(block1, LEN, qmul, qadd);
+    }
+}
 
 typedef void (*filter)(uint8_t *src, int stride, int qscale);
 
@@ -56,6 +98,9 @@ void checkasm_check_h263dsp(void)
     H263DSPContext ctx;
 
     ff_h263dsp_init(&ctx);
+    check_dct_unquantize(ctx.h263_dct_unquantize_intra, "intra");
+    check_dct_unquantize(ctx.h263_dct_unquantize_inter, "inter");
+    report("dct_unquantize");
     check_loop_filter('h', ctx.h263_h_loop_filter);
     check_loop_filter('v', ctx.h263_v_loop_filter);
     report("loop_filter");
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V dct_unquantize_{intra, inter}
  2024-06-09 16:23 [FFmpeg-devel] [PATCHv3 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
  2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
  2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
@ 2024-06-09 16:23 ` Rémi Denis-Courmont
  2 siblings, 0 replies; 14+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-09 16:23 UTC (permalink / raw)
  To: ffmpeg-devel

T-Head C908:
h263dsp.dct_unquantize_inter_c:       3.7
h263dsp.dct_unquantize_inter_rvv_i32: 1.7
h263dsp.dct_unquantize_intra_c:       4.0
h263dsp.dct_unquantize_intra_rvv_i32: 2.0

SpacemiT X60:
h263dsp.dct_unquantize_inter_c:       3.5
h263dsp.dct_unquantize_inter_rvv_i32: 0.7
h263dsp.dct_unquantize_intra_c:       3.5
h263dsp.dct_unquantize_intra_rvv_i32: 0.7
---
 libavcodec/riscv/h263dsp_init.c | 15 ++++++++++++---
 libavcodec/riscv/h263dsp_rvv.S  | 24 ++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/libavcodec/riscv/h263dsp_init.c b/libavcodec/riscv/h263dsp_init.c
index 21b536366c..8c5d92ef76 100644
--- a/libavcodec/riscv/h263dsp_init.c
+++ b/libavcodec/riscv/h263dsp_init.c
@@ -25,6 +25,8 @@
 #include "libavutil/riscv/cpu.h"
 #include "libavcodec/h263dsp.h"
 
+void ff_h263_dct_unquantize_intra_rvv(int16_t *, size_t len, int, int);
+void ff_h263_dct_unquantize_inter_rvv(int16_t *, size_t len, int, int);
 void ff_h263_h_loop_filter_rvv(uint8_t *src, int stride, int q);
 void ff_h263_v_loop_filter_rvv(uint8_t *src, int stride, int q);
 
@@ -33,9 +35,16 @@ av_cold void ff_h263dsp_init_riscv(H263DSPContext *c)
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if ((flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
-        c->h263_h_loop_filter = ff_h263_h_loop_filter_rvv;
-        c->h263_v_loop_filter = ff_h263_v_loop_filter_rvv;
+    if (flags & AV_CPU_FLAG_RVV_I32) {
+        if (flags & AV_CPU_FLAG_RVB_ADDR) {
+            c->h263_dct_unquantize_intra = ff_h263_dct_unquantize_intra_rvv;
+            c->h263_dct_unquantize_inter = ff_h263_dct_unquantize_inter_rvv;
+        }
+
+        if (ff_rv_vlen_least(128)) {
+            c->h263_h_loop_filter = ff_h263_h_loop_filter_rvv;
+            c->h263_v_loop_filter = ff_h263_v_loop_filter_rvv;
+        }
     }
 #endif
 }
diff --git a/libavcodec/riscv/h263dsp_rvv.S b/libavcodec/riscv/h263dsp_rvv.S
index 97503d527c..d61cf2c747 100644
--- a/libavcodec/riscv/h263dsp_rvv.S
+++ b/libavcodec/riscv/h263dsp_rvv.S
@@ -20,6 +20,30 @@
 
 #include "libavutil/riscv/asm.S"
 
+func ff_h263_dct_unquantize_intra_rvv, zve32x
+        addi    a1, a1, -1
+        addi    a0, a0, 2
+        # fall through
+endfunc
+
+func ff_h263_dct_unquantize_inter_rvv, zve32x
+1:
+        vsetvli  t0, a1, e16, m4, ta, mu
+        vle16.v  v8, (a0)
+        sub      a1, a1, t0
+        vmv.v.x  v24, a3
+        vmslt.vi v0, v8, 0
+        vmul.vx  v16, v8, a2
+        vneg.v   v24, v24, v0.t
+        vmsne.vi v0, v8, 0
+        vadd.vv  v8, v16, v24, v0.t
+        vse16.v v8, (a0)
+        sh1add  a0, t0, a0
+        bnez    a1, 1b
+
+        ret
+endfunc
+
         .option push
         .option norelax
 func ff_h263_h_loop_filter_rvv, zve32x
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
@ 2024-06-10 11:41   ` Michael Niedermayer
  2024-06-10 12:14     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 14+ messages in thread
From: Michael Niedermayer @ 2024-06-10 11:41 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2959 bytes --]

On Sun, Jun 09, 2024 at 07:23:45PM +0300, Rémi Denis-Courmont wrote:
> ---
>  libavcodec/mpegvideo.c | 44 ++++++++++--------------------------------
>  1 file changed, 10 insertions(+), 34 deletions(-)
> 
> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> index 7af823b8bd..9be0fecc8d 100644
> --- a/libavcodec/mpegvideo.c
> +++ b/libavcodec/mpegvideo.c
> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>                                    int16_t *block, int n, int qscale)
>  {
> -    int i, level, qmul, qadd;
> -    int nCoeffs;
> +    int qmul = qscale << 1;
> +    int qadd, nCoeffs;
>  
>      av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>  
> -    qmul = qscale << 1;
> -
>      if (!s->h263_aic) {
>          block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
>          qadd = (qscale - 1) | 1;
> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>          qadd = 0;
>      }
>      if(s->ac_pred)
> -        nCoeffs=63;
> +        nCoeffs = 64;
>      else
> -        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
> +        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
>  
> -    for(i=1; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
>  }
>  
>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>                                    int16_t *block, int n, int qscale)
>  {
> -    int i, level, qmul, qadd;
> +    int qmul = qscale << 1;
> +    int qadd = (qscale - 1) | 1;
>      int nCoeffs;
>  
>      av_assert2(s->block_last_index[n]>=0);
>  
> -    qadd = (qscale - 1) | 1;
> -    qmul = qscale << 1;
> -
> -    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> -
> -    for(i=0; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);

It would be better if the "+ 1" would not be needed and teh functions
would keep using "<="

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

During times of universal deceit, telling the truth becomes a
revolutionary act. -- George Orwell

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-10 11:41   ` Michael Niedermayer
@ 2024-06-10 12:14     ` Rémi Denis-Courmont
  2024-06-10 12:32       ` Michael Niedermayer
  0 siblings, 1 reply; 14+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-10 12:14 UTC (permalink / raw)
  To: FFmpeg development discussions and patches



Le 10 juin 2024 14:41:31 GMT+03:00, Michael Niedermayer <michael@niedermayer.cc> a écrit :
>On Sun, Jun 09, 2024 at 07:23:45PM +0300, Rémi Denis-Courmont wrote:
>> ---
>>  libavcodec/mpegvideo.c | 44 ++++++++++--------------------------------
>>  1 file changed, 10 insertions(+), 34 deletions(-)
>> 
>> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
>> index 7af823b8bd..9be0fecc8d 100644
>> --- a/libavcodec/mpegvideo.c
>> +++ b/libavcodec/mpegvideo.c
>> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>>  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>>                                    int16_t *block, int n, int qscale)
>>  {
>> -    int i, level, qmul, qadd;
>> -    int nCoeffs;
>> +    int qmul = qscale << 1;
>> +    int qadd, nCoeffs;
>>  
>>      av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>>  
>> -    qmul = qscale << 1;
>> -
>>      if (!s->h263_aic) {
>>          block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
>>          qadd = (qscale - 1) | 1;
>> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>>          qadd = 0;
>>      }
>>      if(s->ac_pred)
>> -        nCoeffs=63;
>> +        nCoeffs = 64;
>>      else
>> -        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
>> +        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
>>  
>> -    for(i=1; i<=nCoeffs; i++) {
>> -        level = block[i];
>> -        if (level) {
>> -            if (level < 0) {
>> -                level = level * qmul - qadd;
>> -            } else {
>> -                level = level * qmul + qadd;
>> -            }
>> -            block[i] = level;
>> -        }
>> -    }
>> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
>>  }
>>  
>>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>>                                    int16_t *block, int n, int qscale)
>>  {
>> -    int i, level, qmul, qadd;
>> +    int qmul = qscale << 1;
>> +    int qadd = (qscale - 1) | 1;
>>      int nCoeffs;
>>  
>>      av_assert2(s->block_last_index[n]>=0);
>>  
>> -    qadd = (qscale - 1) | 1;
>> -    qmul = qscale << 1;
>> -
>> -    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
>> -
>> -    for(i=0; i<=nCoeffs; i++) {
>> -        level = block[i];
>> -        if (level) {
>> -            if (level < 0) {
>> -                level = level * qmul - qadd;
>> -            } else {
>> -                level = level * qmul + qadd;
>> -            }
>> -            block[i] = level;
>> -        }
>> -    }
>> +    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
>> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
>
>It would be better if the "+ 1" would not be needed and teh functions
>would keep using "<="

Why? AFAICT, that just makes the assembler more confusing by requiring an unusual boundary check.

And then for SVE and RVV, adding one is unavoidable possible, as we need the element count to predicate the vector ops. So we'd just end up having to add one in the assembler instead of C.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-10 12:14     ` Rémi Denis-Courmont
@ 2024-06-10 12:32       ` Michael Niedermayer
  0 siblings, 0 replies; 14+ messages in thread
From: Michael Niedermayer @ 2024-06-10 12:32 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 3789 bytes --]

On Mon, Jun 10, 2024 at 03:14:14PM +0300, Rémi Denis-Courmont wrote:
> 
> 
> Le 10 juin 2024 14:41:31 GMT+03:00, Michael Niedermayer <michael@niedermayer.cc> a écrit :
> >On Sun, Jun 09, 2024 at 07:23:45PM +0300, Rémi Denis-Courmont wrote:
> >> ---
> >>  libavcodec/mpegvideo.c | 44 ++++++++++--------------------------------
> >>  1 file changed, 10 insertions(+), 34 deletions(-)
> >> 
> >> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> >> index 7af823b8bd..9be0fecc8d 100644
> >> --- a/libavcodec/mpegvideo.c
> >> +++ b/libavcodec/mpegvideo.c
> >> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
> >>  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
> >>                                    int16_t *block, int n, int qscale)
> >>  {
> >> -    int i, level, qmul, qadd;
> >> -    int nCoeffs;
> >> +    int qmul = qscale << 1;
> >> +    int qadd, nCoeffs;
> >>  
> >>      av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
> >>  
> >> -    qmul = qscale << 1;
> >> -
> >>      if (!s->h263_aic) {
> >>          block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
> >>          qadd = (qscale - 1) | 1;
> >> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
> >>          qadd = 0;
> >>      }
> >>      if(s->ac_pred)
> >> -        nCoeffs=63;
> >> +        nCoeffs = 64;
> >>      else
> >> -        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
> >> +        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
> >>  
> >> -    for(i=1; i<=nCoeffs; i++) {
> >> -        level = block[i];
> >> -        if (level) {
> >> -            if (level < 0) {
> >> -                level = level * qmul - qadd;
> >> -            } else {
> >> -                level = level * qmul + qadd;
> >> -            }
> >> -            block[i] = level;
> >> -        }
> >> -    }
> >> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
> >>  }
> >>  
> >>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
> >>                                    int16_t *block, int n, int qscale)
> >>  {
> >> -    int i, level, qmul, qadd;
> >> +    int qmul = qscale << 1;
> >> +    int qadd = (qscale - 1) | 1;
> >>      int nCoeffs;
> >>  
> >>      av_assert2(s->block_last_index[n]>=0);
> >>  
> >> -    qadd = (qscale - 1) | 1;
> >> -    qmul = qscale << 1;
> >> -
> >> -    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> >> -
> >> -    for(i=0; i<=nCoeffs; i++) {
> >> -        level = block[i];
> >> -        if (level) {
> >> -            if (level < 0) {
> >> -                level = level * qmul - qadd;
> >> -            } else {
> >> -                level = level * qmul + qadd;
> >> -            }
> >> -            block[i] = level;
> >> -        }
> >> -    }
> >> +    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
> >> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
> >
> >It would be better if the "+ 1" would not be needed and teh functions
> >would keep using "<="
> 
> Why?

because its 1 instruction less and the compieler cannot optimize it out.


> AFAICT, that just makes the assembler more confusing by requiring an unusual boundary check.
> 
> And then for SVE and RVV, adding one is unavoidable possible, as we need the element count to predicate the vector ops. So we'd just end up having to add one in the assembler instead of C.

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If a bugfix only changes things apparently unrelated to the bug with no
further explanation, that is a good sign that the bugfix is wrong.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
@ 2024-07-06 19:10   ` Andreas Rheinhardt
  0 siblings, 0 replies; 14+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 19:10 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> ---
>  tests/checkasm/h263dsp.c | 57 +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 56 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
> index 2d0957a90b..26020211dc 100644
> --- a/tests/checkasm/h263dsp.c
> +++ b/tests/checkasm/h263dsp.c
> @@ -18,13 +18,65 @@
>   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>   */
>  
> +#include <stdbool.h>
>  #include <string.h>
>  
>  #include "checkasm.h"
>  
> -#include "libavcodec/h263dsp.h"
> +#include "libavutil/avassert.h"
>  #include "libavutil/mem.h"
>  #include "libavutil/mem_internal.h"
> +#include "libavcodec/h263dsp.h"
> +#include "libavcodec/mpegvideodata.h"
> +
> +static uint_fast8_t mpeg_qscale_rnd(void)
> +{
> +    int n = rnd(), q = (n >> 1) & 31;
> +
> +    if (n & 1)
> +        return ff_mpeg2_non_linear_qscale[q];
> +    else
> +        return q << 1;

This branch reminds me of the q_scale_type check from the
mpeg2-unquantize functions. Why is it here?
(For H.263, qscale is simply a value in 1..31. This code will test
values outside of this range.)

> +}
> +
> +typedef void (*unquantizer)(int16_t *, ptrdiff_t, int, int);
> +
> +static void check_dct_unquantize(unquantizer func, bool inter)
> +{
> +    const char *name = inter ? "inter" : "intra";
> +#define LEN 64
> +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
> +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
> +    size_t len = rnd() % LEN;
> +    const int qscale = mpeg_qscale_rnd();
> +    const int qmul = qscale << 1;
> +    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
> +
> +    declare_func(void, int16_t *, ptrdiff_t, int, int);
> +
> +    for (size_t i = 0; i < LEN; i++) {
> +        int r = rnd();
> +
> +        block1[i] = block0[i] = (r & 1) ? (r >> 1) : 0;

This will potentially set all elements to some nonzero value. In
reality, all elements not processed in the (current) C version are zero;
in other words, one could add

    for (; i <= 63; i++)
        av_assert0(!block[i]);

at the end of dct_unquantize_h263_(inter|intra)_c. This is what makes it
possible to switch to a do-while loop. It would also make it possible to
unroll the loop/omit special handling for the tail when vectorizing, so
this test is a hindrance to potential optimizations.

> +    }
> +
> +    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
> +        call_ref(block0, 0, qmul, qadd);
> +        call_new(block1, 0, qmul, qadd);
> +
> +        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
> +            fail();
> +
> +        av_assert0(len < LEN);
> +        call_ref(block0, len, qmul, qadd);
> +        call_new(block1, len, qmul, qadd);
> +
> +        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
> +            fail();
> +
> +        bench_new(block1, LEN, qmul, qadd);
> +    }
> +}
>  
>  typedef void (*filter)(uint8_t *src, int stride, int qscale);
>  
> @@ -56,6 +108,9 @@ void checkasm_check_h263dsp(void)
>      H263DSPContext ctx;
>  
>      ff_h263dsp_init(&ctx);
> +    check_dct_unquantize(ctx.h263_dct_unquantize_intra, false);
> +    check_dct_unquantize(ctx.h263_dct_unquantize_inter, true);
> +    report("dct_unquantize");
>      check_loop_filter('h', ctx.h263_h_loop_filter);
>      check_loop_filter('v', ctx.h263_v_loop_filter);
>      report("loop_filter");

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
@ 2024-07-01 19:13 ` Rémi Denis-Courmont
  2024-07-06 19:10   ` Andreas Rheinhardt
  0 siblings, 1 reply; 14+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 19:13 UTC (permalink / raw)
  To: ffmpeg-devel

---
 tests/checkasm/h263dsp.c | 57 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
index 2d0957a90b..26020211dc 100644
--- a/tests/checkasm/h263dsp.c
+++ b/tests/checkasm/h263dsp.c
@@ -18,13 +18,65 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <stdbool.h>
 #include <string.h>
 
 #include "checkasm.h"
 
-#include "libavcodec/h263dsp.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
+#include "libavcodec/h263dsp.h"
+#include "libavcodec/mpegvideodata.h"
+
+static uint_fast8_t mpeg_qscale_rnd(void)
+{
+    int n = rnd(), q = (n >> 1) & 31;
+
+    if (n & 1)
+        return ff_mpeg2_non_linear_qscale[q];
+    else
+        return q << 1;
+}
+
+typedef void (*unquantizer)(int16_t *, ptrdiff_t, int, int);
+
+static void check_dct_unquantize(unquantizer func, bool inter)
+{
+    const char *name = inter ? "inter" : "intra";
+#define LEN 64
+    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
+    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
+    size_t len = rnd() % LEN;
+    const int qscale = mpeg_qscale_rnd();
+    const int qmul = qscale << 1;
+    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
+
+    declare_func(void, int16_t *, ptrdiff_t, int, int);
+
+    for (size_t i = 0; i < LEN; i++) {
+        int r = rnd();
+
+        block1[i] = block0[i] = (r & 1) ? (r >> 1) : 0;
+    }
+
+    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
+        call_ref(block0, 0, qmul, qadd);
+        call_new(block1, 0, qmul, qadd);
+
+        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
+            fail();
+
+        av_assert0(len < LEN);
+        call_ref(block0, len, qmul, qadd);
+        call_new(block1, len, qmul, qadd);
+
+        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
+            fail();
+
+        bench_new(block1, LEN, qmul, qadd);
+    }
+}
 
 typedef void (*filter)(uint8_t *src, int stride, int qscale);
 
@@ -56,6 +108,9 @@ void checkasm_check_h263dsp(void)
     H263DSPContext ctx;
 
     ff_h263dsp_init(&ctx);
+    check_dct_unquantize(ctx.h263_dct_unquantize_intra, false);
+    check_dct_unquantize(ctx.h263_dct_unquantize_inter, true);
+    report("dct_unquantize");
     check_loop_filter('h', ctx.h263_h_loop_filter);
     check_loop_filter('v', ctx.h263_v_loop_filter);
     report("loop_filter");
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-12 19:39       ` James Almer
@ 2024-06-12 19:52         ` Rémi Denis-Courmont
  0 siblings, 0 replies; 14+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-12 19:52 UTC (permalink / raw)
  To: ffmpeg-devel

Le keskiviikkona 12. kesäkuuta 2024, 22.39.49 EEST James Almer a écrit :
> >> These are not padded, and unless I'm reading this wrong, an asm
> >> implementation loading say 16 bytes at a time will overread/write in
> >> dct_unquantize_intra (which offsets block by 1).
> > 
> > AFAIU, there is no padding per se, but the block buffer size is always
> > exactly 64 elements, regardless of the number of coeffs, hence this code.
> > The old NEON intrinsic code seems to assume the block is a multiple of 8
> > elements, and the tail can be overwritten safely (hence not checking in
> > memcmp()).
> > 
> > I have a feeling that I am not grasping the implications of you comment
> > here.
> An asm function loading 16 bytes at a time from block[1] onwards for
> intra may end up reading two bytes more than available at the end of the
> 128 byte wide buffer.

Wouldn't that be a bug in the assembler function? Do you mean that checkasm 
should add padding to check against overwrites?

The whole point of separating inter and intra was to preserve alignment for 
those instruction set extensions that want it (C and RVV couldn't care less).

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-12 19:15     ` Rémi Denis-Courmont
@ 2024-06-12 19:39       ` James Almer
  2024-06-12 19:52         ` Rémi Denis-Courmont
  0 siblings, 1 reply; 14+ messages in thread
From: James Almer @ 2024-06-12 19:39 UTC (permalink / raw)
  To: ffmpeg-devel

On 6/12/2024 4:15 PM, Rémi Denis-Courmont wrote:
> Le keskiviikkona 12. kesäkuuta 2024, 21.39.12 EEST James Almer a écrit :
>> On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
>>> ---
>>>
>>>    tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
>>>    1 file changed, 46 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
>>> index 2d0957a90b..8a2cdb34df 100644
>>> --- a/tests/checkasm/h263dsp.c
>>> +++ b/tests/checkasm/h263dsp.c
>>> @@ -18,13 +18,55 @@
>>>
>>>     * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>>>     */
>>>
>>> +#include <stdbool.h>
>>>
>>>    #include <string.h>
>>>    
>>>    #include "checkasm.h"
>>>
>>> -#include "libavcodec/h263dsp.h"
>>> +#include "libavutil/avassert.h"
>>>
>>>    #include "libavutil/mem.h"
>>>    #include "libavutil/mem_internal.h"
>>>
>>> +#include "libavcodec/h263dsp.h"
>>> +#include "libavcodec/mpegvideodata.h"
>>> +
>>> +static uint_fast8_t mpeg_qscale_rnd(void)
>>> +{
>>> +    int n = rnd(), q = (n >> 1) & 31;
>>> +
>>> +    if (n & 1)
>>> +        return ff_mpeg2_non_linear_qscale[q];
>>> +    else
>>> +        return q << 1;
>>> +}
>>> +
>>> +typedef void (*unquantizer)(int16_t *, size_t, int, int);
>>> +
>>> +static void check_dct_unquantize(unquantizer func, const char *name)
>>> +{
>>> +#define LEN 64
>>> +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
>>> +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
>>
>> These are not padded, and unless I'm reading this wrong, an asm
>> implementation loading say 16 bytes at a time will overread/write in
>> dct_unquantize_intra (which offsets block by 1).
> 
> AFAIU, there is no padding per se, but the block buffer size is always exactly
> 64 elements, regardless of the number of coeffs, hence this code. The old NEON
> intrinsic code seems to assume the block is a multiple of 8 elements, and the
> tail can be overwritten safely (hence not checking in memcmp()).
> 
> I have a feeling that I am not grasping the implications of you comment here.

An asm function loading 16 bytes at a time from block[1] onwards for 
intra may end up reading two bytes more than available at the end of the 
128 byte wide buffer.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-12 18:39   ` James Almer
@ 2024-06-12 19:15     ` Rémi Denis-Courmont
  2024-06-12 19:39       ` James Almer
  0 siblings, 1 reply; 14+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-12 19:15 UTC (permalink / raw)
  To: ffmpeg-devel

Le keskiviikkona 12. kesäkuuta 2024, 21.39.12 EEST James Almer a écrit :
> On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
> > ---
> > 
> >   tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
> >   1 file changed, 46 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
> > index 2d0957a90b..8a2cdb34df 100644
> > --- a/tests/checkasm/h263dsp.c
> > +++ b/tests/checkasm/h263dsp.c
> > @@ -18,13 +18,55 @@
> > 
> >    * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> >    */
> > 
> > +#include <stdbool.h>
> > 
> >   #include <string.h>
> >   
> >   #include "checkasm.h"
> > 
> > -#include "libavcodec/h263dsp.h"
> > +#include "libavutil/avassert.h"
> > 
> >   #include "libavutil/mem.h"
> >   #include "libavutil/mem_internal.h"
> > 
> > +#include "libavcodec/h263dsp.h"
> > +#include "libavcodec/mpegvideodata.h"
> > +
> > +static uint_fast8_t mpeg_qscale_rnd(void)
> > +{
> > +    int n = rnd(), q = (n >> 1) & 31;
> > +
> > +    if (n & 1)
> > +        return ff_mpeg2_non_linear_qscale[q];
> > +    else
> > +        return q << 1;
> > +}
> > +
> > +typedef void (*unquantizer)(int16_t *, size_t, int, int);
> > +
> > +static void check_dct_unquantize(unquantizer func, const char *name)
> > +{
> > +#define LEN 64
> > +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
> > +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
> 
> These are not padded, and unless I'm reading this wrong, an asm
> implementation loading say 16 bytes at a time will overread/write in
> dct_unquantize_intra (which offsets block by 1).

AFAIU, there is no padding per se, but the block buffer size is always exactly 
64 elements, regardless of the number of coeffs, hence this code. The old NEON 
intrinsic code seems to assume the block is a multiple of 8 elements, and the 
tail can be overwritten safely (hence not checking in memcmp()).

I have a feeling that I am not grasping the implications of you comment here.

-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-12  4:47 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
@ 2024-06-12 18:39   ` James Almer
  2024-06-12 19:15     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 14+ messages in thread
From: James Almer @ 2024-06-12 18:39 UTC (permalink / raw)
  To: ffmpeg-devel

On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
> ---
>   tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
> index 2d0957a90b..8a2cdb34df 100644
> --- a/tests/checkasm/h263dsp.c
> +++ b/tests/checkasm/h263dsp.c
> @@ -18,13 +18,55 @@
>    * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>    */
>   
> +#include <stdbool.h>
>   #include <string.h>
>   
>   #include "checkasm.h"
>   
> -#include "libavcodec/h263dsp.h"
> +#include "libavutil/avassert.h"
>   #include "libavutil/mem.h"
>   #include "libavutil/mem_internal.h"
> +#include "libavcodec/h263dsp.h"
> +#include "libavcodec/mpegvideodata.h"
> +
> +static uint_fast8_t mpeg_qscale_rnd(void)
> +{
> +    int n = rnd(), q = (n >> 1) & 31;
> +
> +    if (n & 1)
> +        return ff_mpeg2_non_linear_qscale[q];
> +    else
> +        return q << 1;
> +}
> +
> +typedef void (*unquantizer)(int16_t *, size_t, int, int);
> +
> +static void check_dct_unquantize(unquantizer func, const char *name)
> +{
> +#define LEN 64
> +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
> +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);

These are not padded, and unless I'm reading this wrong, an asm 
implementation loading say 16 bytes at a time will overread/write in 
dct_unquantize_intra (which offsets block by 1).

> +    size_t len = 1 + (rnd() & (LEN - 1));
> +    const int qscale = mpeg_qscale_rnd();
> +    const int qmul = qscale << 1;
> +    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
> +
> +    declare_func(void, int16_t *, size_t, int, int);
> +
> +    for (size_t i = 0; i < LEN; i++)
> +        block1[i] = block0[i] = (rnd() & 1) ? rnd() : 0;
> +
> +    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
> +        av_assert0(len <= LEN);
> +        call_ref(block0, len, qmul, qadd);
> +        call_new(block1, len, qmul, qadd);
> +
> +        if (memcmp(block0, block1, len * sizeof (int16_t)))
> +            fail();
> +
> +        bench_new(block1, LEN, qmul, qadd);
> +    }
> +}
>   
>   typedef void (*filter)(uint8_t *src, int stride, int qscale);
>   
> @@ -56,6 +98,9 @@ void checkasm_check_h263dsp(void)
>       H263DSPContext ctx;
>   
>       ff_h263dsp_init(&ctx);
> +    check_dct_unquantize(ctx.h263_dct_unquantize_intra, "intra");
> +    check_dct_unquantize(ctx.h263_dct_unquantize_inter, "inter");
> +    report("dct_unquantize");
>       check_loop_filter('h', ctx.h263_h_loop_filter);
>       check_loop_filter('v', ctx.h263_v_loop_filter);
>       report("loop_filter");
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-12  4:47 [FFmpeg-devel] [PATCHv5 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
@ 2024-06-12  4:47 ` Rémi Denis-Courmont
  2024-06-12 18:39   ` James Almer
  0 siblings, 1 reply; 14+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-12  4:47 UTC (permalink / raw)
  To: ffmpeg-devel

---
 tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
index 2d0957a90b..8a2cdb34df 100644
--- a/tests/checkasm/h263dsp.c
+++ b/tests/checkasm/h263dsp.c
@@ -18,13 +18,55 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <stdbool.h>
 #include <string.h>
 
 #include "checkasm.h"
 
-#include "libavcodec/h263dsp.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
+#include "libavcodec/h263dsp.h"
+#include "libavcodec/mpegvideodata.h"
+
+static uint_fast8_t mpeg_qscale_rnd(void)
+{
+    int n = rnd(), q = (n >> 1) & 31;
+
+    if (n & 1)
+        return ff_mpeg2_non_linear_qscale[q];
+    else
+        return q << 1;
+}
+
+typedef void (*unquantizer)(int16_t *, size_t, int, int);
+
+static void check_dct_unquantize(unquantizer func, const char *name)
+{
+#define LEN 64
+    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
+    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
+    size_t len = 1 + (rnd() & (LEN - 1));
+    const int qscale = mpeg_qscale_rnd();
+    const int qmul = qscale << 1;
+    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
+
+    declare_func(void, int16_t *, size_t, int, int);
+
+    for (size_t i = 0; i < LEN; i++)
+        block1[i] = block0[i] = (rnd() & 1) ? rnd() : 0;
+
+    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
+        av_assert0(len <= LEN);
+        call_ref(block0, len, qmul, qadd);
+        call_new(block1, len, qmul, qadd);
+
+        if (memcmp(block0, block1, len * sizeof (int16_t)))
+            fail();
+
+        bench_new(block1, LEN, qmul, qadd);
+    }
+}
 
 typedef void (*filter)(uint8_t *src, int stride, int qscale);
 
@@ -56,6 +98,9 @@ void checkasm_check_h263dsp(void)
     H263DSPContext ctx;
 
     ff_h263dsp_init(&ctx);
+    check_dct_unquantize(ctx.h263_dct_unquantize_intra, "intra");
+    check_dct_unquantize(ctx.h263_dct_unquantize_inter, "inter");
+    report("dct_unquantize");
     check_loop_filter('h', ctx.h263_h_loop_filter);
     check_loop_filter('v', ctx.h263_v_loop_filter);
     report("loop_filter");
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2024-07-06 19:10 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-09 16:23 [FFmpeg-devel] [PATCHv3 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
2024-06-10 11:41   ` Michael Niedermayer
2024-06-10 12:14     ` Rémi Denis-Courmont
2024-06-10 12:32       ` Michael Niedermayer
2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V " Rémi Denis-Courmont
2024-06-12  4:47 [FFmpeg-devel] [PATCHv5 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
2024-06-12  4:47 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
2024-06-12 18:39   ` James Almer
2024-06-12 19:15     ` Rémi Denis-Courmont
2024-06-12 19:39       ` James Almer
2024-06-12 19:52         ` Rémi Denis-Courmont
2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
2024-07-06 19:10   ` Andreas Rheinhardt

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git