Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
@ 2024-07-01 19:13 Rémi Denis-Courmont
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
                   ` (4 more replies)
  0 siblings, 5 replies; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 19:13 UTC (permalink / raw)
  To: ffmpeg-devel

Note that optimised implementations of these functions will be taken
into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
are *not* overloaded by existing optimisations.
---
 libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
 libavcodec/h263dsp.h |  4 ++++
 2 files changed, 29 insertions(+)

diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
index 6a13353499..dc146bf821 100644
--- a/libavcodec/h263dsp.c
+++ b/libavcodec/h263dsp.c
@@ -19,10 +19,33 @@
 #include <stdint.h>
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "config.h"
 #include "h263dsp.h"
 
+static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
+                                        int qmul, int qadd)
+{
+    for (ptrdiff_t i = 0; i <= len; i++) {
+        int level = block[i];
+
+        if (level) {
+            if (level < 0)
+                level = level * qmul - qadd;
+            else
+                level = level * qmul + qadd;
+            block[i] = level;
+        }
+    }
+}
+
+static void h263_dct_unquantize_intra_c(int16_t *block, ptrdiff_t len,
+                                        int qmul, int qadd)
+{
+    h263_dct_unquantize_inter_c(block + 1, len - 1, qmul, qadd);
+}
+
 const uint8_t ff_h263_loop_filter_strength[32] = {
     0, 1, 1, 2, 2, 3, 3,  4,  4,  4,  5,  5,  6,  6,  7, 7,
     7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12
@@ -116,6 +139,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale)
 
 av_cold void ff_h263dsp_init(H263DSPContext *ctx)
 {
+    ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c;
+    ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c;
     ctx->h263_h_loop_filter = h263_h_loop_filter_c;
     ctx->h263_v_loop_filter = h263_v_loop_filter_c;
 
diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
index 2dccd23392..d26498f491 100644
--- a/libavcodec/h263dsp.h
+++ b/libavcodec/h263dsp.h
@@ -24,6 +24,10 @@
 extern const uint8_t ff_h263_loop_filter_strength[32];
 
 typedef struct H263DSPContext {
+    void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */,
+                                      ptrdiff_t len, int mul, int add);
+    void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */,
+                                      ptrdiff_t len, int mul, int add);
     void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
     void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
 } H263DSPContext;
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
@ 2024-07-01 19:13 ` Rémi Denis-Courmont
  2024-07-06 15:23   ` Andreas Rheinhardt
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 19:13 UTC (permalink / raw)
  To: ffmpeg-devel

---
 configure              |  4 ++--
 libavcodec/mpegvideo.c | 40 +++++++++-------------------------------
 2 files changed, 11 insertions(+), 33 deletions(-)

diff --git a/configure b/configure
index fed4c44cd1..42b9a72d5a 100755
--- a/configure
+++ b/configure
@@ -2954,8 +2954,8 @@ ftr_decoder_select="adts_header"
 g2m_decoder_deps="zlib"
 g2m_decoder_select="blockdsp idctdsp jpegtables"
 g729_decoder_select="audiodsp"
-h261_decoder_select="mpegvideodec"
-h261_encoder_select="mpegvideoenc"
+h261_decoder_select="h263dsp mpegvideodec"
+h261_encoder_select="h263dsp mpegvideoenc"
 h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp"
 h263_encoder_select="h263dsp mpegvideoenc"
 h263i_decoder_select="h263_decoder"
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index b9a0469335..c9094d1cce 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -202,13 +202,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
-    int nCoeffs;
+    int qmul = qscale << 1;
+    int qadd, nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
 
-    qmul = qscale << 1;
-
     if (!s->h263_aic) {
         block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
         qadd = (qscale - 1) | 1;
@@ -220,43 +218,20 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
     else
         nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
 
-    for(i=1; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
 }
 
 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
+    int qmul = qscale << 1;
+    int qadd = (qscale - 1) | 1;
     int nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0);
 
-    qadd = (qscale - 1) | 1;
-    qmul = qscale << 1;
-
     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-
-    for(i=0; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
 }
 
 
@@ -276,6 +251,9 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
 static av_cold void dsp_init(MpegEncContext *s)
 {
     ff_blockdsp_init(&s->bdsp);
+#if CONFIG_H263DSP
+    ff_h263dsp_init(&s->h263dsp);
+#endif
     ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
     ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
 
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
@ 2024-07-01 19:13 ` Rémi Denis-Courmont
  2024-07-06 19:10   ` Andreas Rheinhardt
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V " Rémi Denis-Courmont
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 19:13 UTC (permalink / raw)
  To: ffmpeg-devel

---
 tests/checkasm/h263dsp.c | 57 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
index 2d0957a90b..26020211dc 100644
--- a/tests/checkasm/h263dsp.c
+++ b/tests/checkasm/h263dsp.c
@@ -18,13 +18,65 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <stdbool.h>
 #include <string.h>
 
 #include "checkasm.h"
 
-#include "libavcodec/h263dsp.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
+#include "libavcodec/h263dsp.h"
+#include "libavcodec/mpegvideodata.h"
+
+static uint_fast8_t mpeg_qscale_rnd(void)
+{
+    int n = rnd(), q = (n >> 1) & 31;
+
+    if (n & 1)
+        return ff_mpeg2_non_linear_qscale[q];
+    else
+        return q << 1;
+}
+
+typedef void (*unquantizer)(int16_t *, ptrdiff_t, int, int);
+
+static void check_dct_unquantize(unquantizer func, bool inter)
+{
+    const char *name = inter ? "inter" : "intra";
+#define LEN 64
+    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
+    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
+    size_t len = rnd() % LEN;
+    const int qscale = mpeg_qscale_rnd();
+    const int qmul = qscale << 1;
+    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
+
+    declare_func(void, int16_t *, ptrdiff_t, int, int);
+
+    for (size_t i = 0; i < LEN; i++) {
+        int r = rnd();
+
+        block1[i] = block0[i] = (r & 1) ? (r >> 1) : 0;
+    }
+
+    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
+        call_ref(block0, 0, qmul, qadd);
+        call_new(block1, 0, qmul, qadd);
+
+        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
+            fail();
+
+        av_assert0(len < LEN);
+        call_ref(block0, len, qmul, qadd);
+        call_new(block1, len, qmul, qadd);
+
+        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
+            fail();
+
+        bench_new(block1, LEN, qmul, qadd);
+    }
+}
 
 typedef void (*filter)(uint8_t *src, int stride, int qscale);
 
@@ -56,6 +108,9 @@ void checkasm_check_h263dsp(void)
     H263DSPContext ctx;
 
     ff_h263dsp_init(&ctx);
+    check_dct_unquantize(ctx.h263_dct_unquantize_intra, false);
+    check_dct_unquantize(ctx.h263_dct_unquantize_inter, true);
+    report("dct_unquantize");
     check_loop_filter('h', ctx.h263_h_loop_filter);
     check_loop_filter('v', ctx.h263_v_loop_filter);
     report("loop_filter");
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V dct_unquantize_{intra, inter}
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
@ 2024-07-01 19:13 ` Rémi Denis-Courmont
  2024-07-06 10:54 ` [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
  2024-07-06 15:17 ` Andreas Rheinhardt
  4 siblings, 0 replies; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 19:13 UTC (permalink / raw)
  To: ffmpeg-devel

T-Head C908:
h263dsp.dct_unquantize_inter_c:       3.7
h263dsp.dct_unquantize_inter_rvv_i32: 1.7
h263dsp.dct_unquantize_intra_c:       4.0
h263dsp.dct_unquantize_intra_rvv_i32: 1.5

SpacemiT X60:
h263dsp.dct_unquantize_inter_c:       3.5
h263dsp.dct_unquantize_inter_rvv_i32: 1.2
h263dsp.dct_unquantize_intra_c:       3.5
h263dsp.dct_unquantize_intra_rvv_i32: 1.2
---
 libavcodec/riscv/h263dsp_init.c | 15 ++++++++++++---
 libavcodec/riscv/h263dsp_rvv.S  | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/libavcodec/riscv/h263dsp_init.c b/libavcodec/riscv/h263dsp_init.c
index 21b536366c..78d537d1e1 100644
--- a/libavcodec/riscv/h263dsp_init.c
+++ b/libavcodec/riscv/h263dsp_init.c
@@ -25,6 +25,8 @@
 #include "libavutil/riscv/cpu.h"
 #include "libavcodec/h263dsp.h"
 
+void ff_h263_dct_unquantize_intra_rvv(int16_t *, ptrdiff_t len, int, int);
+void ff_h263_dct_unquantize_inter_rvv(int16_t *, ptrdiff_t len, int, int);
 void ff_h263_h_loop_filter_rvv(uint8_t *src, int stride, int q);
 void ff_h263_v_loop_filter_rvv(uint8_t *src, int stride, int q);
 
@@ -33,9 +35,16 @@ av_cold void ff_h263dsp_init_riscv(H263DSPContext *c)
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if ((flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
-        c->h263_h_loop_filter = ff_h263_h_loop_filter_rvv;
-        c->h263_v_loop_filter = ff_h263_v_loop_filter_rvv;
+    if (flags & AV_CPU_FLAG_RVV_I32) {
+        if (ff_rv_vlen_least(128) || (flags & AV_CPU_FLAG_RVB_ADDR)) {
+            c->h263_dct_unquantize_intra = ff_h263_dct_unquantize_intra_rvv;
+            c->h263_dct_unquantize_inter = ff_h263_dct_unquantize_inter_rvv;
+        }
+
+        if (ff_rv_vlen_least(128)) {
+            c->h263_h_loop_filter = ff_h263_h_loop_filter_rvv;
+            c->h263_v_loop_filter = ff_h263_v_loop_filter_rvv;
+        }
     }
 #endif
 }
diff --git a/libavcodec/riscv/h263dsp_rvv.S b/libavcodec/riscv/h263dsp_rvv.S
index 97503d527c..f2b7eb9a87 100644
--- a/libavcodec/riscv/h263dsp_rvv.S
+++ b/libavcodec/riscv/h263dsp_rvv.S
@@ -20,6 +20,33 @@
 
 #include "libavutil/riscv/asm.S"
 
+func ff_h263_dct_unquantize_intra_rvv, zve32x
+        addi    a0, a0, 2
+        j       1f
+endfunc
+
+func ff_h263_dct_unquantize_inter_rvv, zve32x
+        addi    a1, a1, 1
+1:
+        vsetvli  t0, a1, e16, m8, ta, mu
+        vle16.v  v8, (a0)
+#if defined(__riscv_v_min_vlen) && __riscv_v_min_vlen < 128
+        sub      a1, a1, t0
+#endif
+        vmv.v.x  v16, a3
+        vmslt.vi v0, v8, 0
+        vneg.v   v16, v16, v0.t
+        vmsne.vi v0, v8, 0
+        vmadd.vx v8, a2, v16, v0.t
+        vse16.v  v8, (a0)
+#if defined(__riscv_v_min_vlen) && __riscv_v_min_vlen < 128
+        sh1add   a0, t0, a0
+        bnez     a1, 1b
+#endif
+
+        ret
+endfunc
+
         .option push
         .option norelax
 func ff_h263_h_loop_filter_rvv, zve32x
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
                   ` (2 preceding siblings ...)
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V " Rémi Denis-Courmont
@ 2024-07-06 10:54 ` Rémi Denis-Courmont
  2024-07-06 15:17 ` Andreas Rheinhardt
  4 siblings, 0 replies; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-06 10:54 UTC (permalink / raw)
  To: ffmpeg-devel

Will merge soonish except for feedback.

-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
                   ` (3 preceding siblings ...)
  2024-07-06 10:54 ` [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
@ 2024-07-06 15:17 ` Andreas Rheinhardt
  2024-07-06 16:10   ` Rémi Denis-Courmont
  4 siblings, 1 reply; 22+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 15:17 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> Note that optimised implementations of these functions will be taken
> into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
> are *not* overloaded by existing optimisations.
> ---
>  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
>  libavcodec/h263dsp.h |  4 ++++
>  2 files changed, 29 insertions(+)
> 
> diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
> index 6a13353499..dc146bf821 100644
> --- a/libavcodec/h263dsp.c
> +++ b/libavcodec/h263dsp.c
> @@ -19,10 +19,33 @@
>  #include <stdint.h>
>  
>  #include "libavutil/attributes.h"
> +#include "libavutil/avassert.h"
>  #include "libavutil/common.h"
>  #include "config.h"
>  #include "h263dsp.h"
>  
> +static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
> +                                        int qmul, int qadd)
> +{
> +    for (ptrdiff_t i = 0; i <= len; i++) {

In the current code, the unquantizing inter does not need an initial
check (the compiler can optimize the for-loop to a do-while-like loop).
This will be lost here.

> +        int level = block[i];
> +
> +        if (level) {
> +            if (level < 0)
> +                level = level * qmul - qadd;
> +            else
> +                level = level * qmul + qadd;
> +            block[i] = level;
> +        }
> +    }
> +}
> +
> +static void h263_dct_unquantize_intra_c(int16_t *block, ptrdiff_t len,
> +                                        int qmul, int qadd)
> +{
> +    h263_dct_unquantize_inter_c(block + 1, len - 1, qmul, qadd);
> +}
> +
>  const uint8_t ff_h263_loop_filter_strength[32] = {
>      0, 1, 1, 2, 2, 3, 3,  4,  4,  4,  5,  5,  6,  6,  7, 7,
>      7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12
> @@ -116,6 +139,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale)
>  
>  av_cold void ff_h263dsp_init(H263DSPContext *ctx)
>  {
> +    ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c;
> +    ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c;
>      ctx->h263_h_loop_filter = h263_h_loop_filter_c;
>      ctx->h263_v_loop_filter = h263_v_loop_filter_c;
>  
> diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
> index 2dccd23392..d26498f491 100644
> --- a/libavcodec/h263dsp.h
> +++ b/libavcodec/h263dsp.h
> @@ -24,6 +24,10 @@
>  extern const uint8_t ff_h263_loop_filter_strength[32];
>  
>  typedef struct H263DSPContext {
> +    void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */,
> +                                      ptrdiff_t len, int mul, int add);
> +    void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */,
> +                                      ptrdiff_t len, int mul, int add);
>      void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
>      void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
>  } H263DSPContext;

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
@ 2024-07-06 15:23   ` Andreas Rheinhardt
  2024-07-06 16:10     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 22+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 15:23 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> ---
>  configure              |  4 ++--
>  libavcodec/mpegvideo.c | 40 +++++++++-------------------------------
>  2 files changed, 11 insertions(+), 33 deletions(-)
> 
> diff --git a/configure b/configure
> index fed4c44cd1..42b9a72d5a 100755
> --- a/configure
> +++ b/configure
> @@ -2954,8 +2954,8 @@ ftr_decoder_select="adts_header"
>  g2m_decoder_deps="zlib"
>  g2m_decoder_select="blockdsp idctdsp jpegtables"
>  g729_decoder_select="audiodsp"
> -h261_decoder_select="mpegvideodec"
> -h261_encoder_select="mpegvideoenc"
> +h261_decoder_select="h263dsp mpegvideodec"

Unnecessary since f793074784ae79dabc4f83b61710161b3fe3288c
(Btw: It was not only H.261 and H.263 using these; RV34 and VC-1 did so,
too, but only for the error-resilience code. But this is no longer an
issue since 7b539ca3e6bae701d88096ff8dc3db7f13b7318a.)

> +h261_encoder_select="h263dsp mpegvideoenc"
>  h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp"
>  h263_encoder_select="h263dsp mpegvideoenc"
>  h263i_decoder_select="h263_decoder"
> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> index b9a0469335..c9094d1cce 100644
> --- a/libavcodec/mpegvideo.c
> +++ b/libavcodec/mpegvideo.c
> @@ -202,13 +202,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>                                    int16_t *block, int n, int qscale)
>  {
> -    int i, level, qmul, qadd;
> -    int nCoeffs;
> +    int qmul = qscale << 1;
> +    int qadd, nCoeffs;
>  
>      av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>  
> -    qmul = qscale << 1;
> -
>      if (!s->h263_aic) {
>          block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
>          qadd = (qscale - 1) | 1;
> @@ -220,43 +218,20 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>      else
>          nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
>  
> -    for(i=1; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
>  }
>  
>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>                                    int16_t *block, int n, int qscale)
>  {
> -    int i, level, qmul, qadd;
> +    int qmul = qscale << 1;
> +    int qadd = (qscale - 1) | 1;
>      int nCoeffs;
>  
>      av_assert2(s->block_last_index[n]>=0);
>  
> -    qadd = (qscale - 1) | 1;
> -    qmul = qscale << 1;
> -
>      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> -
> -    for(i=0; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);

This adds an indirection. I have asked you to actually benchmark this
code (and not only the DSP function you add), but you never did.

>  }
>  
>  
> @@ -276,6 +251,9 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
>  static av_cold void dsp_init(MpegEncContext *s)
>  {
>      ff_blockdsp_init(&s->bdsp);
> +#if CONFIG_H263DSP
> +    ff_h263dsp_init(&s->h263dsp);
> +#endif
>      ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
>      ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
>  

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-06 15:17 ` Andreas Rheinhardt
@ 2024-07-06 16:10   ` Rémi Denis-Courmont
  2024-07-06 16:19     ` Andreas Rheinhardt
  0 siblings, 1 reply; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-06 16:10 UTC (permalink / raw)
  To: ffmpeg-devel

Le lauantaina 6. heinäkuuta 2024, 18.17.37 EEST Andreas Rheinhardt a écrit :
> Rémi Denis-Courmont:
> > Note that optimised implementations of these functions will be taken
> > into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
> > are *not* overloaded by existing optimisations.
> > ---
> > 
> >  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
> >  libavcodec/h263dsp.h |  4 ++++
> >  2 files changed, 29 insertions(+)
> > 
> > diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
> > index 6a13353499..dc146bf821 100644
> > --- a/libavcodec/h263dsp.c
> > +++ b/libavcodec/h263dsp.c
> > @@ -19,10 +19,33 @@
> > 
> >  #include <stdint.h>
> >  
> >  #include "libavutil/attributes.h"
> > 
> > +#include "libavutil/avassert.h"
> > 
> >  #include "libavutil/common.h"
> >  #include "config.h"
> >  #include "h263dsp.h"
> > 
> > +static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
> > +                                        int qmul, int qadd)
> > +{
> > +    for (ptrdiff_t i = 0; i <= len; i++) {
> 
> In the current code, the unquantizing inter does not need an initial
> check (the compiler can optimize the for-loop to a do-while-like loop).
> This will be lost here.

If this not the consequence of your own actions...

You asked:
- to use a signed type there, and
- to avoid the separate initial branch in the intra case.

And that is exactly how this ends up. You can't have it both ways.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-06 15:23   ` Andreas Rheinhardt
@ 2024-07-06 16:10     ` Rémi Denis-Courmont
  2024-07-06 16:20       ` Andreas Rheinhardt
  0 siblings, 1 reply; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-06 16:10 UTC (permalink / raw)
  To: ffmpeg-devel

Le lauantaina 6. heinäkuuta 2024, 18.23.00 EEST Andreas Rheinhardt a écrit :
> >  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
> >  
> >                                    int16_t *block, int n, int qscale)
> >  
> >  {
> > 
> > -    int i, level, qmul, qadd;
> > +    int qmul = qscale << 1;
> > +    int qadd = (qscale - 1) | 1;
> > 
> >      int nCoeffs;
> >      
> >      av_assert2(s->block_last_index[n]>=0);
> > 
> > -    qadd = (qscale - 1) | 1;
> > -    qmul = qscale << 1;
> > -
> > 
> >      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> > 
> > -
> > -    for(i=0; i<=nCoeffs; i++) {
> > -        level = block[i];
> > -        if (level) {
> > -            if (level < 0) {
> > -                level = level * qmul - qadd;
> > -            } else {
> > -                level = level * qmul + qadd;
> > -            }
> > -            block[i] = level;
> > -        }
> > -    }
> > +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
> 
> This adds an indirection. I have asked you to actually benchmark this
> code (and not only the DSP function you add), but you never did.

I already pointed out previously that this is the way this project does DSP 
code. Certainly it would be nice to hard-code the path when there is only one 
possible. This is often the case on Armv8 notably, and of course on platforms 
without optimisations.

But that's a general problem way beyond the scope of this patchset. We always 
add indirect function calls in this sort of situation, and I don't see why I 
would have duty to benchmark it, so I am going to ignore this.

-- 
雷米‧德尼-库尔蒙
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-06 16:10   ` Rémi Denis-Courmont
@ 2024-07-06 16:19     ` Andreas Rheinhardt
  2024-07-06 17:28       ` Rémi Denis-Courmont
  0 siblings, 1 reply; 22+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 16:19 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> Le lauantaina 6. heinäkuuta 2024, 18.17.37 EEST Andreas Rheinhardt a écrit :
>> Rémi Denis-Courmont:
>>> Note that optimised implementations of these functions will be taken
>>> into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
>>> are *not* overloaded by existing optimisations.
>>> ---
>>>
>>>  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
>>>  libavcodec/h263dsp.h |  4 ++++
>>>  2 files changed, 29 insertions(+)
>>>
>>> diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
>>> index 6a13353499..dc146bf821 100644
>>> --- a/libavcodec/h263dsp.c
>>> +++ b/libavcodec/h263dsp.c
>>> @@ -19,10 +19,33 @@
>>>
>>>  #include <stdint.h>
>>>  
>>>  #include "libavutil/attributes.h"
>>>
>>> +#include "libavutil/avassert.h"
>>>
>>>  #include "libavutil/common.h"
>>>  #include "config.h"
>>>  #include "h263dsp.h"
>>>
>>> +static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
>>> +                                        int qmul, int qadd)
>>> +{
>>> +    for (ptrdiff_t i = 0; i <= len; i++) {
>>
>> In the current code, the unquantizing inter does not need an initial
>> check (the compiler can optimize the for-loop to a do-while-like loop).
>> This will be lost here.
> 
> If this not the consequence of your own actions...
> 
> You asked:
> - to use a signed type there, and
> - to avoid the separate initial branch in the intra case.
> 
> And that is exactly how this ends up. You can't have it both ways.
> 

One can have it both ways: Use a do-while loop.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-06 16:10     ` Rémi Denis-Courmont
@ 2024-07-06 16:20       ` Andreas Rheinhardt
  2024-07-06 16:47         ` Rémi Denis-Courmont
  0 siblings, 1 reply; 22+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 16:20 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> Le lauantaina 6. heinäkuuta 2024, 18.23.00 EEST Andreas Rheinhardt a écrit :
>>>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>>>  
>>>                                    int16_t *block, int n, int qscale)
>>>  
>>>  {
>>>
>>> -    int i, level, qmul, qadd;
>>> +    int qmul = qscale << 1;
>>> +    int qadd = (qscale - 1) | 1;
>>>
>>>      int nCoeffs;
>>>      
>>>      av_assert2(s->block_last_index[n]>=0);
>>>
>>> -    qadd = (qscale - 1) | 1;
>>> -    qmul = qscale << 1;
>>> -
>>>
>>>      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
>>>
>>> -
>>> -    for(i=0; i<=nCoeffs; i++) {
>>> -        level = block[i];
>>> -        if (level) {
>>> -            if (level < 0) {
>>> -                level = level * qmul - qadd;
>>> -            } else {
>>> -                level = level * qmul + qadd;
>>> -            }
>>> -            block[i] = level;
>>> -        }
>>> -    }
>>> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
>>
>> This adds an indirection. I have asked you to actually benchmark this
>> code (and not only the DSP function you add), but you never did.
> 
> I already pointed out previously that this is the way this project does DSP 
> code. Certainly it would be nice to hard-code the path when there is only one 
> possible. This is often the case on Armv8 notably, and of course on platforms 
> without optimisations.
> 
> But that's a general problem way beyond the scope of this patchset. We always 
> add indirect function calls in this sort of situation, and I don't see why I 
> would have duty to benchmark it, so I am going to ignore this.

You have a duty to benchmark it because you add it where it wasn't before.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-06 16:20       ` Andreas Rheinhardt
@ 2024-07-06 16:47         ` Rémi Denis-Courmont
  2024-07-06 18:27           ` Andreas Rheinhardt
  0 siblings, 1 reply; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-06 16:47 UTC (permalink / raw)
  To: ffmpeg-devel

Le lauantaina 6. heinäkuuta 2024, 19.20.33 EEST Andreas Rheinhardt a écrit :
> Rémi Denis-Courmont:
> > Le lauantaina 6. heinäkuuta 2024, 18.23.00 EEST Andreas Rheinhardt a écrit 
:
> >>>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
> >>>  
> >>>                                    int16_t *block, int n, int qscale)
> >>>  
> >>>  {
> >>> 
> >>> -    int i, level, qmul, qadd;
> >>> +    int qmul = qscale << 1;
> >>> +    int qadd = (qscale - 1) | 1;
> >>> 
> >>>      int nCoeffs;
> >>>      
> >>>      av_assert2(s->block_last_index[n]>=0);
> >>> 
> >>> -    qadd = (qscale - 1) | 1;
> >>> -    qmul = qscale << 1;
> >>> -
> >>> 
> >>>      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> >>> 
> >>> -
> >>> -    for(i=0; i<=nCoeffs; i++) {
> >>> -        level = block[i];
> >>> -        if (level) {
> >>> -            if (level < 0) {
> >>> -                level = level * qmul - qadd;
> >>> -            } else {
> >>> -                level = level * qmul + qadd;
> >>> -            }
> >>> -            block[i] = level;
> >>> -        }
> >>> -    }
> >>> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
> >> 
> >> This adds an indirection. I have asked you to actually benchmark this
> >> code (and not only the DSP function you add), but you never did.
> > 
> > I already pointed out previously that this is the way this project does
> > DSP
> > code. Certainly it would be nice to hard-code the path when there is only
> > one possible. This is often the case on Armv8 notably, and of course on
> > platforms without optimisations.
> > 
> > But that's a general problem way beyond the scope of this patchset. We
> > always add indirect function calls in this sort of situation, and I don't
> > see why I would have duty to benchmark it, so I am going to ignore this.
> 
> You have a duty to benchmark it because you add it where it wasn't before.

I don't recall other people benchmarking the indirect branch they've added 
previously for other DSP code. Recent examples include VVC and FLAC. 
Rightfully so, because there is not really an alternative anyway. Even GNU 
IFUNCs and Glibc alternative libraries internally use an indirect branch 
(hidden in PLT/GOT), and FFmpeg can't self-patch at load-time like the Linux 
kernel does, nor can it generate dynamic PLT entries with direct branches.

Also if an indirect call is unacceptable, then how come the calling code is 
itself an indirect call and for abstraction rather than performance.

Your request is completely arbitrary here. Yes, there is already an indirect 
call close up, and so? I'm not trying to clean MpegEncContext here, only 
trying to add one function to checkasm, RVV and (with James' work) post-MMX 
x86.

Lastly, you don't even specify what benchmark to run. Comparing something 
against nothing is, as my manager would say, pointless, since the relative 
overhead ought to be an approximation of infinity (in practice, you end up 
measuring the overhead of the benchmarking code instead).

-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-06 16:19     ` Andreas Rheinhardt
@ 2024-07-06 17:28       ` Rémi Denis-Courmont
  2024-07-06 18:23         ` Andreas Rheinhardt
  0 siblings, 1 reply; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-06 17:28 UTC (permalink / raw)
  To: ffmpeg-devel

Le lauantaina 6. heinäkuuta 2024, 19.19.35 EEST Andreas Rheinhardt a écrit :
> Rémi Denis-Courmont:
> > Le lauantaina 6. heinäkuuta 2024, 18.17.37 EEST Andreas Rheinhardt a écrit 
:
> >> Rémi Denis-Courmont:
> >>> Note that optimised implementations of these functions will be taken
> >>> into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
> >>> are *not* overloaded by existing optimisations.
> >>> ---
> >>> 
> >>>  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
> >>>  libavcodec/h263dsp.h |  4 ++++
> >>>  2 files changed, 29 insertions(+)
> >>> 
> >>> diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
> >>> index 6a13353499..dc146bf821 100644
> >>> --- a/libavcodec/h263dsp.c
> >>> +++ b/libavcodec/h263dsp.c
> >>> @@ -19,10 +19,33 @@
> >>> 
> >>>  #include <stdint.h>
> >>>  
> >>>  #include "libavutil/attributes.h"
> >>> 
> >>> +#include "libavutil/avassert.h"
> >>> 
> >>>  #include "libavutil/common.h"
> >>>  #include "config.h"
> >>>  #include "h263dsp.h"
> >>> 
> >>> +static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
> >>> +                                        int qmul, int qadd)
> >>> +{
> >>> +    for (ptrdiff_t i = 0; i <= len; i++) {
> >> 
> >> In the current code, the unquantizing inter does not need an initial
> >> check (the compiler can optimize the for-loop to a do-while-like loop).
> >> This will be lost here.
> > 
> > If this not the consequence of your own actions...
> > 
> > You asked:
> > - to use a signed type there, and
> > - to avoid the separate initial branch in the intra case.
> > 
> > And that is exactly how this ends up. You can't have it both ways.
> 
> One can have it both ways: Use a do-while loop.

That is only possible if the otherwise identical intra/inter code are 
duplicated, or if putting back the initial check in the intra case. The very 
check which you asked to remove.

So no, you can't have it both ways. I don't really care either way, but the 
check for intra needs to be *somewhere*.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-06 17:28       ` Rémi Denis-Courmont
@ 2024-07-06 18:23         ` Andreas Rheinhardt
  0 siblings, 0 replies; 22+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 18:23 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> Le lauantaina 6. heinäkuuta 2024, 19.19.35 EEST Andreas Rheinhardt a écrit :
>> Rémi Denis-Courmont:
>>> Le lauantaina 6. heinäkuuta 2024, 18.17.37 EEST Andreas Rheinhardt a écrit 
> :
>>>> Rémi Denis-Courmont:
>>>>> Note that optimised implementations of these functions will be taken
>>>>> into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
>>>>> are *not* overloaded by existing optimisations.
>>>>> ---
>>>>>
>>>>>  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
>>>>>  libavcodec/h263dsp.h |  4 ++++
>>>>>  2 files changed, 29 insertions(+)
>>>>>
>>>>> diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
>>>>> index 6a13353499..dc146bf821 100644
>>>>> --- a/libavcodec/h263dsp.c
>>>>> +++ b/libavcodec/h263dsp.c
>>>>> @@ -19,10 +19,33 @@
>>>>>
>>>>>  #include <stdint.h>
>>>>>  
>>>>>  #include "libavutil/attributes.h"
>>>>>
>>>>> +#include "libavutil/avassert.h"
>>>>>
>>>>>  #include "libavutil/common.h"
>>>>>  #include "config.h"
>>>>>  #include "h263dsp.h"
>>>>>
>>>>> +static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
>>>>> +                                        int qmul, int qadd)
>>>>> +{
>>>>> +    for (ptrdiff_t i = 0; i <= len; i++) {
>>>>
>>>> In the current code, the unquantizing inter does not need an initial
>>>> check (the compiler can optimize the for-loop to a do-while-like loop).
>>>> This will be lost here.
>>>
>>> If this not the consequence of your own actions...
>>>
>>> You asked:
>>> - to use a signed type there, and
>>> - to avoid the separate initial branch in the intra case.
>>>
>>> And that is exactly how this ends up. You can't have it both ways.
>>
>> One can have it both ways: Use a do-while loop.
> 
> That is only possible if the otherwise identical intra/inter code are 
> duplicated, or if putting back the initial check in the intra case. The very 
> check which you asked to remove.
> 
> So no, you can't have it both ways. I don't really care either way, but the 
> check for intra needs to be *somewhere*.
> 

No, it need not exist. The point of raster_end etc. is purely to
shortcut this loop. It would be correct to always use 63 for nCoeffs. In
fact, it seems to me that dct_unquantize_h263_inter_mmx() already uses a
do-while-like loop.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-06 16:47         ` Rémi Denis-Courmont
@ 2024-07-06 18:27           ` Andreas Rheinhardt
  0 siblings, 0 replies; 22+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 18:27 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> Le lauantaina 6. heinäkuuta 2024, 19.20.33 EEST Andreas Rheinhardt a écrit :
>> Rémi Denis-Courmont:
>>> Le lauantaina 6. heinäkuuta 2024, 18.23.00 EEST Andreas Rheinhardt a écrit 
> :
>>>>>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>>>>>  
>>>>>                                    int16_t *block, int n, int qscale)
>>>>>  
>>>>>  {
>>>>>
>>>>> -    int i, level, qmul, qadd;
>>>>> +    int qmul = qscale << 1;
>>>>> +    int qadd = (qscale - 1) | 1;
>>>>>
>>>>>      int nCoeffs;
>>>>>      
>>>>>      av_assert2(s->block_last_index[n]>=0);
>>>>>
>>>>> -    qadd = (qscale - 1) | 1;
>>>>> -    qmul = qscale << 1;
>>>>> -
>>>>>
>>>>>      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
>>>>>
>>>>> -
>>>>> -    for(i=0; i<=nCoeffs; i++) {
>>>>> -        level = block[i];
>>>>> -        if (level) {
>>>>> -            if (level < 0) {
>>>>> -                level = level * qmul - qadd;
>>>>> -            } else {
>>>>> -                level = level * qmul + qadd;
>>>>> -            }
>>>>> -            block[i] = level;
>>>>> -        }
>>>>> -    }
>>>>> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
>>>>
>>>> This adds an indirection. I have asked you to actually benchmark this
>>>> code (and not only the DSP function you add), but you never did.
>>>
>>> I already pointed out previously that this is the way this project does
>>> DSP
>>> code. Certainly it would be nice to hard-code the path when there is only
>>> one possible. This is often the case on Armv8 notably, and of course on
>>> platforms without optimisations.
>>>
>>> But that's a general problem way beyond the scope of this patchset. We
>>> always add indirect function calls in this sort of situation, and I don't
>>> see why I would have duty to benchmark it, so I am going to ignore this.
>>
>> You have a duty to benchmark it because you add it where it wasn't before.
> 
> I don't recall other people benchmarking the indirect branch they've added 
> previously for other DSP code. Recent examples include VVC and FLAC. 
> Rightfully so, because there is not really an alternative anyway. Even GNU 
> IFUNCs and Glibc alternative libraries internally use an indirect branch 
> (hidden in PLT/GOT), and FFmpeg can't self-patch at load-time like the Linux 
> kernel does, nor can it generate dynamic PLT entries with direct branches.
> 
> Also if an indirect call is unacceptable, then how come the calling code is 
> itself an indirect call and for abstraction rather than performance.

I did not even say that it is unacceptable. Merely that it should be
benched.

> 
> Your request is completely arbitrary here. Yes, there is already an indirect 
> call close up, and so? I'm not trying to clean MpegEncContext here, only 
> trying to add one function to checkasm, RVV and (with James' work) post-MMX 
> x86.
> 
> Lastly, you don't even specify what benchmark to run. Comparing something 
> against nothing is, as my manager would say, pointless, since the relative 
> overhead ought to be an approximation of infinity (in practice, you end up 
> measuring the overhead of the benchmarking code instead).

You shall compare the function you are modifying, namely
dct_unquantize_h263_(intra|inter)_c.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
@ 2024-07-06 19:10   ` Andreas Rheinhardt
  0 siblings, 0 replies; 22+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 19:10 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> ---
>  tests/checkasm/h263dsp.c | 57 +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 56 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
> index 2d0957a90b..26020211dc 100644
> --- a/tests/checkasm/h263dsp.c
> +++ b/tests/checkasm/h263dsp.c
> @@ -18,13 +18,65 @@
>   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>   */
>  
> +#include <stdbool.h>
>  #include <string.h>
>  
>  #include "checkasm.h"
>  
> -#include "libavcodec/h263dsp.h"
> +#include "libavutil/avassert.h"
>  #include "libavutil/mem.h"
>  #include "libavutil/mem_internal.h"
> +#include "libavcodec/h263dsp.h"
> +#include "libavcodec/mpegvideodata.h"
> +
> +static uint_fast8_t mpeg_qscale_rnd(void)
> +{
> +    int n = rnd(), q = (n >> 1) & 31;
> +
> +    if (n & 1)
> +        return ff_mpeg2_non_linear_qscale[q];
> +    else
> +        return q << 1;

This branch reminds me of the q_scale_type check from the
mpeg2-unquantize functions. Why is it here?
(For H.263, qscale is simply a value in 1..31. This code will test
values outside of this range.)

> +}
> +
> +typedef void (*unquantizer)(int16_t *, ptrdiff_t, int, int);
> +
> +static void check_dct_unquantize(unquantizer func, bool inter)
> +{
> +    const char *name = inter ? "inter" : "intra";
> +#define LEN 64
> +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
> +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
> +    size_t len = rnd() % LEN;
> +    const int qscale = mpeg_qscale_rnd();
> +    const int qmul = qscale << 1;
> +    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
> +
> +    declare_func(void, int16_t *, ptrdiff_t, int, int);
> +
> +    for (size_t i = 0; i < LEN; i++) {
> +        int r = rnd();
> +
> +        block1[i] = block0[i] = (r & 1) ? (r >> 1) : 0;

This will potentially set all elements to some nonzero value. In
reality, all elements not processed in the (current) C version are zero;
in other words, one could add

    for (; i <= 63; i++)
        av_assert0(!block[i]);

at the end of dct_unquantize_h263_(inter|intra)_c. This is what makes it
possible to switch to a do-while loop. It would also make it possible to
unroll the loop/omit special handling for the tail when vectorizing, so
this test is a hindrance to potential optimizations.

> +    }
> +
> +    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
> +        call_ref(block0, 0, qmul, qadd);
> +        call_new(block1, 0, qmul, qadd);
> +
> +        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
> +            fail();
> +
> +        av_assert0(len < LEN);
> +        call_ref(block0, len, qmul, qadd);
> +        call_new(block1, len, qmul, qadd);
> +
> +        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
> +            fail();
> +
> +        bench_new(block1, LEN, qmul, qadd);
> +    }
> +}
>  
>  typedef void (*filter)(uint8_t *src, int stride, int qscale);
>  
> @@ -56,6 +108,9 @@ void checkasm_check_h263dsp(void)
>      H263DSPContext ctx;
>  
>      ff_h263dsp_init(&ctx);
> +    check_dct_unquantize(ctx.h263_dct_unquantize_intra, false);
> +    check_dct_unquantize(ctx.h263_dct_unquantize_inter, true);
> +    report("dct_unquantize");
>      check_loop_filter('h', ctx.h263_h_loop_filter);
>      check_loop_filter('v', ctx.h263_v_loop_filter);
>      report("loop_filter");

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-12 19:39       ` James Almer
@ 2024-06-12 19:52         ` Rémi Denis-Courmont
  0 siblings, 0 replies; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-12 19:52 UTC (permalink / raw)
  To: ffmpeg-devel

Le keskiviikkona 12. kesäkuuta 2024, 22.39.49 EEST James Almer a écrit :
> >> These are not padded, and unless I'm reading this wrong, an asm
> >> implementation loading say 16 bytes at a time will overread/write in
> >> dct_unquantize_intra (which offsets block by 1).
> > 
> > AFAIU, there is no padding per se, but the block buffer size is always
> > exactly 64 elements, regardless of the number of coeffs, hence this code.
> > The old NEON intrinsic code seems to assume the block is a multiple of 8
> > elements, and the tail can be overwritten safely (hence not checking in
> > memcmp()).
> > 
> > I have a feeling that I am not grasping the implications of you comment
> > here.
> An asm function loading 16 bytes at a time from block[1] onwards for
> intra may end up reading two bytes more than available at the end of the
> 128 byte wide buffer.

Wouldn't that be a bug in the assembler function? Do you mean that checkasm 
should add padding to check against overwrites?

The whole point of separating inter and intra was to preserve alignment for 
those instruction set extensions that want it (C and RVV couldn't care less).

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-12 19:15     ` Rémi Denis-Courmont
@ 2024-06-12 19:39       ` James Almer
  2024-06-12 19:52         ` Rémi Denis-Courmont
  0 siblings, 1 reply; 22+ messages in thread
From: James Almer @ 2024-06-12 19:39 UTC (permalink / raw)
  To: ffmpeg-devel

On 6/12/2024 4:15 PM, Rémi Denis-Courmont wrote:
> Le keskiviikkona 12. kesäkuuta 2024, 21.39.12 EEST James Almer a écrit :
>> On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
>>> ---
>>>
>>>    tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
>>>    1 file changed, 46 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
>>> index 2d0957a90b..8a2cdb34df 100644
>>> --- a/tests/checkasm/h263dsp.c
>>> +++ b/tests/checkasm/h263dsp.c
>>> @@ -18,13 +18,55 @@
>>>
>>>     * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>>>     */
>>>
>>> +#include <stdbool.h>
>>>
>>>    #include <string.h>
>>>    
>>>    #include "checkasm.h"
>>>
>>> -#include "libavcodec/h263dsp.h"
>>> +#include "libavutil/avassert.h"
>>>
>>>    #include "libavutil/mem.h"
>>>    #include "libavutil/mem_internal.h"
>>>
>>> +#include "libavcodec/h263dsp.h"
>>> +#include "libavcodec/mpegvideodata.h"
>>> +
>>> +static uint_fast8_t mpeg_qscale_rnd(void)
>>> +{
>>> +    int n = rnd(), q = (n >> 1) & 31;
>>> +
>>> +    if (n & 1)
>>> +        return ff_mpeg2_non_linear_qscale[q];
>>> +    else
>>> +        return q << 1;
>>> +}
>>> +
>>> +typedef void (*unquantizer)(int16_t *, size_t, int, int);
>>> +
>>> +static void check_dct_unquantize(unquantizer func, const char *name)
>>> +{
>>> +#define LEN 64
>>> +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
>>> +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
>>
>> These are not padded, and unless I'm reading this wrong, an asm
>> implementation loading say 16 bytes at a time will overread/write in
>> dct_unquantize_intra (which offsets block by 1).
> 
> AFAIU, there is no padding per se, but the block buffer size is always exactly
> 64 elements, regardless of the number of coeffs, hence this code. The old NEON
> intrinsic code seems to assume the block is a multiple of 8 elements, and the
> tail can be overwritten safely (hence not checking in memcmp()).
> 
> I have a feeling that I am not grasping the implications of you comment here.

An asm function loading 16 bytes at a time from block[1] onwards for 
intra may end up reading two bytes more than available at the end of the 
128 byte wide buffer.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-12 18:39   ` James Almer
@ 2024-06-12 19:15     ` Rémi Denis-Courmont
  2024-06-12 19:39       ` James Almer
  0 siblings, 1 reply; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-12 19:15 UTC (permalink / raw)
  To: ffmpeg-devel

Le keskiviikkona 12. kesäkuuta 2024, 21.39.12 EEST James Almer a écrit :
> On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
> > ---
> > 
> >   tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
> >   1 file changed, 46 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
> > index 2d0957a90b..8a2cdb34df 100644
> > --- a/tests/checkasm/h263dsp.c
> > +++ b/tests/checkasm/h263dsp.c
> > @@ -18,13 +18,55 @@
> > 
> >    * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> >    */
> > 
> > +#include <stdbool.h>
> > 
> >   #include <string.h>
> >   
> >   #include "checkasm.h"
> > 
> > -#include "libavcodec/h263dsp.h"
> > +#include "libavutil/avassert.h"
> > 
> >   #include "libavutil/mem.h"
> >   #include "libavutil/mem_internal.h"
> > 
> > +#include "libavcodec/h263dsp.h"
> > +#include "libavcodec/mpegvideodata.h"
> > +
> > +static uint_fast8_t mpeg_qscale_rnd(void)
> > +{
> > +    int n = rnd(), q = (n >> 1) & 31;
> > +
> > +    if (n & 1)
> > +        return ff_mpeg2_non_linear_qscale[q];
> > +    else
> > +        return q << 1;
> > +}
> > +
> > +typedef void (*unquantizer)(int16_t *, size_t, int, int);
> > +
> > +static void check_dct_unquantize(unquantizer func, const char *name)
> > +{
> > +#define LEN 64
> > +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
> > +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
> 
> These are not padded, and unless I'm reading this wrong, an asm
> implementation loading say 16 bytes at a time will overread/write in
> dct_unquantize_intra (which offsets block by 1).

AFAIU, there is no padding per se, but the block buffer size is always exactly 
64 elements, regardless of the number of coeffs, hence this code. The old NEON 
intrinsic code seems to assume the block is a multiple of 8 elements, and the 
tail can be overwritten safely (hence not checking in memcmp()).

I have a feeling that I am not grasping the implications of you comment here.

-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-12  4:47 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
@ 2024-06-12 18:39   ` James Almer
  2024-06-12 19:15     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 22+ messages in thread
From: James Almer @ 2024-06-12 18:39 UTC (permalink / raw)
  To: ffmpeg-devel

On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
> ---
>   tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
> index 2d0957a90b..8a2cdb34df 100644
> --- a/tests/checkasm/h263dsp.c
> +++ b/tests/checkasm/h263dsp.c
> @@ -18,13 +18,55 @@
>    * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>    */
>   
> +#include <stdbool.h>
>   #include <string.h>
>   
>   #include "checkasm.h"
>   
> -#include "libavcodec/h263dsp.h"
> +#include "libavutil/avassert.h"
>   #include "libavutil/mem.h"
>   #include "libavutil/mem_internal.h"
> +#include "libavcodec/h263dsp.h"
> +#include "libavcodec/mpegvideodata.h"
> +
> +static uint_fast8_t mpeg_qscale_rnd(void)
> +{
> +    int n = rnd(), q = (n >> 1) & 31;
> +
> +    if (n & 1)
> +        return ff_mpeg2_non_linear_qscale[q];
> +    else
> +        return q << 1;
> +}
> +
> +typedef void (*unquantizer)(int16_t *, size_t, int, int);
> +
> +static void check_dct_unquantize(unquantizer func, const char *name)
> +{
> +#define LEN 64
> +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
> +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);

These are not padded, and unless I'm reading this wrong, an asm 
implementation loading say 16 bytes at a time will overread/write in 
dct_unquantize_intra (which offsets block by 1).

> +    size_t len = 1 + (rnd() & (LEN - 1));
> +    const int qscale = mpeg_qscale_rnd();
> +    const int qmul = qscale << 1;
> +    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
> +
> +    declare_func(void, int16_t *, size_t, int, int);
> +
> +    for (size_t i = 0; i < LEN; i++)
> +        block1[i] = block0[i] = (rnd() & 1) ? rnd() : 0;
> +
> +    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
> +        av_assert0(len <= LEN);
> +        call_ref(block0, len, qmul, qadd);
> +        call_new(block1, len, qmul, qadd);
> +
> +        if (memcmp(block0, block1, len * sizeof (int16_t)))
> +            fail();
> +
> +        bench_new(block1, LEN, qmul, qadd);
> +    }
> +}
>   
>   typedef void (*filter)(uint8_t *src, int stride, int qscale);
>   
> @@ -56,6 +98,9 @@ void checkasm_check_h263dsp(void)
>       H263DSPContext ctx;
>   
>       ff_h263dsp_init(&ctx);
> +    check_dct_unquantize(ctx.h263_dct_unquantize_intra, "intra");
> +    check_dct_unquantize(ctx.h263_dct_unquantize_inter, "inter");
> +    report("dct_unquantize");
>       check_loop_filter('h', ctx.h263_h_loop_filter);
>       check_loop_filter('v', ctx.h263_v_loop_filter);
>       report("loop_filter");
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-12  4:47 [FFmpeg-devel] [PATCHv5 " Rémi Denis-Courmont
@ 2024-06-12  4:47 ` Rémi Denis-Courmont
  2024-06-12 18:39   ` James Almer
  0 siblings, 1 reply; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-12  4:47 UTC (permalink / raw)
  To: ffmpeg-devel

---
 tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
index 2d0957a90b..8a2cdb34df 100644
--- a/tests/checkasm/h263dsp.c
+++ b/tests/checkasm/h263dsp.c
@@ -18,13 +18,55 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <stdbool.h>
 #include <string.h>
 
 #include "checkasm.h"
 
-#include "libavcodec/h263dsp.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
+#include "libavcodec/h263dsp.h"
+#include "libavcodec/mpegvideodata.h"
+
+static uint_fast8_t mpeg_qscale_rnd(void)
+{
+    int n = rnd(), q = (n >> 1) & 31;
+
+    if (n & 1)
+        return ff_mpeg2_non_linear_qscale[q];
+    else
+        return q << 1;
+}
+
+typedef void (*unquantizer)(int16_t *, size_t, int, int);
+
+static void check_dct_unquantize(unquantizer func, const char *name)
+{
+#define LEN 64
+    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
+    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
+    size_t len = 1 + (rnd() & (LEN - 1));
+    const int qscale = mpeg_qscale_rnd();
+    const int qmul = qscale << 1;
+    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
+
+    declare_func(void, int16_t *, size_t, int, int);
+
+    for (size_t i = 0; i < LEN; i++)
+        block1[i] = block0[i] = (rnd() & 1) ? rnd() : 0;
+
+    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
+        av_assert0(len <= LEN);
+        call_ref(block0, len, qmul, qadd);
+        call_new(block1, len, qmul, qadd);
+
+        if (memcmp(block0, block1, len * sizeof (int16_t)))
+            fail();
+
+        bench_new(block1, LEN, qmul, qadd);
+    }
+}
 
 typedef void (*filter)(uint8_t *src, int stride, int qscale);
 
@@ -56,6 +98,9 @@ void checkasm_check_h263dsp(void)
     H263DSPContext ctx;
 
     ff_h263dsp_init(&ctx);
+    check_dct_unquantize(ctx.h263_dct_unquantize_intra, "intra");
+    check_dct_unquantize(ctx.h263_dct_unquantize_inter, "inter");
+    report("dct_unquantize");
     check_loop_filter('h', ctx.h263_h_loop_filter);
     check_loop_filter('v', ctx.h263_v_loop_filter);
     report("loop_filter");
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-06-09 16:23 [FFmpeg-devel] [PATCHv3 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
@ 2024-06-09 16:23 ` Rémi Denis-Courmont
  0 siblings, 0 replies; 22+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-09 16:23 UTC (permalink / raw)
  To: ffmpeg-devel

---
 tests/checkasm/h263dsp.c | 47 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
index 2d0957a90b..b21854d061 100644
--- a/tests/checkasm/h263dsp.c
+++ b/tests/checkasm/h263dsp.c
@@ -18,13 +18,55 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <stdbool.h>
 #include <string.h>
 
 #include "checkasm.h"
 
-#include "libavcodec/h263dsp.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
+#include "libavcodec/h263dsp.h"
+#include "libavcodec/mpegvideodata.h"
+
+static uint_fast8_t mpeg_qscale_rnd(void)
+{
+    int n = rnd(), q = (n >> 1) & 31;
+
+    if (n & 1)
+        return ff_mpeg2_non_linear_qscale[q];
+    else
+        return q << 1;
+}
+
+typedef void (*unquantizer)(int16_t *, size_t, int, int);
+
+static void check_dct_unquantize(unquantizer func, const char *name)
+{
+#define LEN 64
+    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
+    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
+    size_t len = rnd() % (LEN + 1);
+    const int qscale = mpeg_qscale_rnd();
+    const int qmul = qscale << 1;
+    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
+
+    declare_func(void, int16_t *, size_t, int, int);
+
+    for (size_t i = 0; i < LEN; i++)
+        block1[i] = block0[i] = (rnd() & 1) ? rnd() : 0;
+
+    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
+        av_assert0(len <= LEN);
+        call_ref(block0, len, qmul, qadd);
+        call_new(block1, len, qmul, qadd);
+
+        if (memcmp(block0, block1, len * sizeof (int16_t)))
+            fail();
+
+        bench_new(block1, LEN, qmul, qadd);
+    }
+}
 
 typedef void (*filter)(uint8_t *src, int stride, int qscale);
 
@@ -56,6 +98,9 @@ void checkasm_check_h263dsp(void)
     H263DSPContext ctx;
 
     ff_h263dsp_init(&ctx);
+    check_dct_unquantize(ctx.h263_dct_unquantize_intra, "intra");
+    check_dct_unquantize(ctx.h263_dct_unquantize_inter, "inter");
+    report("dct_unquantize");
     check_loop_filter('h', ctx.h263_h_loop_filter);
     check_loop_filter('v', ctx.h263_v_loop_filter);
     report("loop_filter");
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2024-07-06 19:10 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
2024-07-06 15:23   ` Andreas Rheinhardt
2024-07-06 16:10     ` Rémi Denis-Courmont
2024-07-06 16:20       ` Andreas Rheinhardt
2024-07-06 16:47         ` Rémi Denis-Courmont
2024-07-06 18:27           ` Andreas Rheinhardt
2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
2024-07-06 19:10   ` Andreas Rheinhardt
2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V " Rémi Denis-Courmont
2024-07-06 10:54 ` [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
2024-07-06 15:17 ` Andreas Rheinhardt
2024-07-06 16:10   ` Rémi Denis-Courmont
2024-07-06 16:19     ` Andreas Rheinhardt
2024-07-06 17:28       ` Rémi Denis-Courmont
2024-07-06 18:23         ` Andreas Rheinhardt
  -- strict thread matches above, loose matches on Subject: below --
2024-06-12  4:47 [FFmpeg-devel] [PATCHv5 " Rémi Denis-Courmont
2024-06-12  4:47 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
2024-06-12 18:39   ` James Almer
2024-06-12 19:15     ` Rémi Denis-Courmont
2024-06-12 19:39       ` James Almer
2024-06-12 19:52         ` Rémi Denis-Courmont
2024-06-09 16:23 [FFmpeg-devel] [PATCHv3 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git