Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
@ 2024-07-01 19:13 Rémi Denis-Courmont
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
                   ` (4 more replies)
  0 siblings, 5 replies; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 19:13 UTC (permalink / raw)
  To: ffmpeg-devel

Note that optimised implementations of these functions will be taken
into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
are *not* overloaded by existing optimisations.
---
 libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
 libavcodec/h263dsp.h |  4 ++++
 2 files changed, 29 insertions(+)

diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
index 6a13353499..dc146bf821 100644
--- a/libavcodec/h263dsp.c
+++ b/libavcodec/h263dsp.c
@@ -19,10 +19,33 @@
 #include <stdint.h>
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "config.h"
 #include "h263dsp.h"
 
+static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
+                                        int qmul, int qadd)
+{
+    for (ptrdiff_t i = 0; i <= len; i++) {
+        int level = block[i];
+
+        if (level) {
+            if (level < 0)
+                level = level * qmul - qadd;
+            else
+                level = level * qmul + qadd;
+            block[i] = level;
+        }
+    }
+}
+
+static void h263_dct_unquantize_intra_c(int16_t *block, ptrdiff_t len,
+                                        int qmul, int qadd)
+{
+    h263_dct_unquantize_inter_c(block + 1, len - 1, qmul, qadd);
+}
+
 const uint8_t ff_h263_loop_filter_strength[32] = {
     0, 1, 1, 2, 2, 3, 3,  4,  4,  4,  5,  5,  6,  6,  7, 7,
     7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12
@@ -116,6 +139,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale)
 
 av_cold void ff_h263dsp_init(H263DSPContext *ctx)
 {
+    ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c;
+    ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c;
     ctx->h263_h_loop_filter = h263_h_loop_filter_c;
     ctx->h263_v_loop_filter = h263_v_loop_filter_c;
 
diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
index 2dccd23392..d26498f491 100644
--- a/libavcodec/h263dsp.h
+++ b/libavcodec/h263dsp.h
@@ -24,6 +24,10 @@
 extern const uint8_t ff_h263_loop_filter_strength[32];
 
 typedef struct H263DSPContext {
+    void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */,
+                                      ptrdiff_t len, int mul, int add);
+    void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */,
+                                      ptrdiff_t len, int mul, int add);
     void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
     void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
 } H263DSPContext;
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
@ 2024-07-01 19:13 ` Rémi Denis-Courmont
  2024-07-06 15:23   ` Andreas Rheinhardt
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 19:13 UTC (permalink / raw)
  To: ffmpeg-devel

---
 configure              |  4 ++--
 libavcodec/mpegvideo.c | 40 +++++++++-------------------------------
 2 files changed, 11 insertions(+), 33 deletions(-)

diff --git a/configure b/configure
index fed4c44cd1..42b9a72d5a 100755
--- a/configure
+++ b/configure
@@ -2954,8 +2954,8 @@ ftr_decoder_select="adts_header"
 g2m_decoder_deps="zlib"
 g2m_decoder_select="blockdsp idctdsp jpegtables"
 g729_decoder_select="audiodsp"
-h261_decoder_select="mpegvideodec"
-h261_encoder_select="mpegvideoenc"
+h261_decoder_select="h263dsp mpegvideodec"
+h261_encoder_select="h263dsp mpegvideoenc"
 h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp"
 h263_encoder_select="h263dsp mpegvideoenc"
 h263i_decoder_select="h263_decoder"
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index b9a0469335..c9094d1cce 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -202,13 +202,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
-    int nCoeffs;
+    int qmul = qscale << 1;
+    int qadd, nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
 
-    qmul = qscale << 1;
-
     if (!s->h263_aic) {
         block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
         qadd = (qscale - 1) | 1;
@@ -220,43 +218,20 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
     else
         nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
 
-    for(i=1; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
 }
 
 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
+    int qmul = qscale << 1;
+    int qadd = (qscale - 1) | 1;
     int nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0);
 
-    qadd = (qscale - 1) | 1;
-    qmul = qscale << 1;
-
     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-
-    for(i=0; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
 }
 
 
@@ -276,6 +251,9 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
 static av_cold void dsp_init(MpegEncContext *s)
 {
     ff_blockdsp_init(&s->bdsp);
+#if CONFIG_H263DSP
+    ff_h263dsp_init(&s->h263dsp);
+#endif
     ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
     ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
 
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
@ 2024-07-01 19:13 ` Rémi Denis-Courmont
  2024-07-06 19:10   ` Andreas Rheinhardt
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V " Rémi Denis-Courmont
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 19:13 UTC (permalink / raw)
  To: ffmpeg-devel

---
 tests/checkasm/h263dsp.c | 57 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
index 2d0957a90b..26020211dc 100644
--- a/tests/checkasm/h263dsp.c
+++ b/tests/checkasm/h263dsp.c
@@ -18,13 +18,65 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include <stdbool.h>
 #include <string.h>
 
 #include "checkasm.h"
 
-#include "libavcodec/h263dsp.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
 #include "libavutil/mem_internal.h"
+#include "libavcodec/h263dsp.h"
+#include "libavcodec/mpegvideodata.h"
+
+static uint_fast8_t mpeg_qscale_rnd(void)
+{
+    int n = rnd(), q = (n >> 1) & 31;
+
+    if (n & 1)
+        return ff_mpeg2_non_linear_qscale[q];
+    else
+        return q << 1;
+}
+
+typedef void (*unquantizer)(int16_t *, ptrdiff_t, int, int);
+
+static void check_dct_unquantize(unquantizer func, bool inter)
+{
+    const char *name = inter ? "inter" : "intra";
+#define LEN 64
+    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
+    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
+    size_t len = rnd() % LEN;
+    const int qscale = mpeg_qscale_rnd();
+    const int qmul = qscale << 1;
+    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
+
+    declare_func(void, int16_t *, ptrdiff_t, int, int);
+
+    for (size_t i = 0; i < LEN; i++) {
+        int r = rnd();
+
+        block1[i] = block0[i] = (r & 1) ? (r >> 1) : 0;
+    }
+
+    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
+        call_ref(block0, 0, qmul, qadd);
+        call_new(block1, 0, qmul, qadd);
+
+        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
+            fail();
+
+        av_assert0(len < LEN);
+        call_ref(block0, len, qmul, qadd);
+        call_new(block1, len, qmul, qadd);
+
+        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
+            fail();
+
+        bench_new(block1, LEN, qmul, qadd);
+    }
+}
 
 typedef void (*filter)(uint8_t *src, int stride, int qscale);
 
@@ -56,6 +108,9 @@ void checkasm_check_h263dsp(void)
     H263DSPContext ctx;
 
     ff_h263dsp_init(&ctx);
+    check_dct_unquantize(ctx.h263_dct_unquantize_intra, false);
+    check_dct_unquantize(ctx.h263_dct_unquantize_inter, true);
+    report("dct_unquantize");
     check_loop_filter('h', ctx.h263_h_loop_filter);
     check_loop_filter('v', ctx.h263_v_loop_filter);
     report("loop_filter");
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V dct_unquantize_{intra, inter}
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
@ 2024-07-01 19:13 ` Rémi Denis-Courmont
  2024-07-06 10:54 ` [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
  2024-07-06 15:17 ` Andreas Rheinhardt
  4 siblings, 0 replies; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-01 19:13 UTC (permalink / raw)
  To: ffmpeg-devel

T-Head C908:
h263dsp.dct_unquantize_inter_c:       3.7
h263dsp.dct_unquantize_inter_rvv_i32: 1.7
h263dsp.dct_unquantize_intra_c:       4.0
h263dsp.dct_unquantize_intra_rvv_i32: 1.5

SpacemiT X60:
h263dsp.dct_unquantize_inter_c:       3.5
h263dsp.dct_unquantize_inter_rvv_i32: 1.2
h263dsp.dct_unquantize_intra_c:       3.5
h263dsp.dct_unquantize_intra_rvv_i32: 1.2
---
 libavcodec/riscv/h263dsp_init.c | 15 ++++++++++++---
 libavcodec/riscv/h263dsp_rvv.S  | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/libavcodec/riscv/h263dsp_init.c b/libavcodec/riscv/h263dsp_init.c
index 21b536366c..78d537d1e1 100644
--- a/libavcodec/riscv/h263dsp_init.c
+++ b/libavcodec/riscv/h263dsp_init.c
@@ -25,6 +25,8 @@
 #include "libavutil/riscv/cpu.h"
 #include "libavcodec/h263dsp.h"
 
+void ff_h263_dct_unquantize_intra_rvv(int16_t *, ptrdiff_t len, int, int);
+void ff_h263_dct_unquantize_inter_rvv(int16_t *, ptrdiff_t len, int, int);
 void ff_h263_h_loop_filter_rvv(uint8_t *src, int stride, int q);
 void ff_h263_v_loop_filter_rvv(uint8_t *src, int stride, int q);
 
@@ -33,9 +35,16 @@ av_cold void ff_h263dsp_init_riscv(H263DSPContext *c)
 #if HAVE_RVV
     int flags = av_get_cpu_flags();
 
-    if ((flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
-        c->h263_h_loop_filter = ff_h263_h_loop_filter_rvv;
-        c->h263_v_loop_filter = ff_h263_v_loop_filter_rvv;
+    if (flags & AV_CPU_FLAG_RVV_I32) {
+        if (ff_rv_vlen_least(128) || (flags & AV_CPU_FLAG_RVB_ADDR)) {
+            c->h263_dct_unquantize_intra = ff_h263_dct_unquantize_intra_rvv;
+            c->h263_dct_unquantize_inter = ff_h263_dct_unquantize_inter_rvv;
+        }
+
+        if (ff_rv_vlen_least(128)) {
+            c->h263_h_loop_filter = ff_h263_h_loop_filter_rvv;
+            c->h263_v_loop_filter = ff_h263_v_loop_filter_rvv;
+        }
     }
 #endif
 }
diff --git a/libavcodec/riscv/h263dsp_rvv.S b/libavcodec/riscv/h263dsp_rvv.S
index 97503d527c..f2b7eb9a87 100644
--- a/libavcodec/riscv/h263dsp_rvv.S
+++ b/libavcodec/riscv/h263dsp_rvv.S
@@ -20,6 +20,33 @@
 
 #include "libavutil/riscv/asm.S"
 
+func ff_h263_dct_unquantize_intra_rvv, zve32x
+        addi    a0, a0, 2
+        j       1f
+endfunc
+
+func ff_h263_dct_unquantize_inter_rvv, zve32x
+        addi    a1, a1, 1
+1:
+        vsetvli  t0, a1, e16, m8, ta, mu
+        vle16.v  v8, (a0)
+#if defined(__riscv_v_min_vlen) && __riscv_v_min_vlen < 128
+        sub      a1, a1, t0
+#endif
+        vmv.v.x  v16, a3
+        vmslt.vi v0, v8, 0
+        vneg.v   v16, v16, v0.t
+        vmsne.vi v0, v8, 0
+        vmadd.vx v8, a2, v16, v0.t
+        vse16.v  v8, (a0)
+#if defined(__riscv_v_min_vlen) && __riscv_v_min_vlen < 128
+        sh1add   a0, t0, a0
+        bnez     a1, 1b
+#endif
+
+        ret
+endfunc
+
         .option push
         .option norelax
 func ff_h263_h_loop_filter_rvv, zve32x
-- 
2.45.2

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
                   ` (2 preceding siblings ...)
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V " Rémi Denis-Courmont
@ 2024-07-06 10:54 ` Rémi Denis-Courmont
  2024-07-06 15:17 ` Andreas Rheinhardt
  4 siblings, 0 replies; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-06 10:54 UTC (permalink / raw)
  To: ffmpeg-devel

Will merge soonish except for feedback.

-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
                   ` (3 preceding siblings ...)
  2024-07-06 10:54 ` [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
@ 2024-07-06 15:17 ` Andreas Rheinhardt
  2024-07-06 16:10   ` Rémi Denis-Courmont
  4 siblings, 1 reply; 29+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 15:17 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> Note that optimised implementations of these functions will be taken
> into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
> are *not* overloaded by existing optimisations.
> ---
>  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
>  libavcodec/h263dsp.h |  4 ++++
>  2 files changed, 29 insertions(+)
> 
> diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
> index 6a13353499..dc146bf821 100644
> --- a/libavcodec/h263dsp.c
> +++ b/libavcodec/h263dsp.c
> @@ -19,10 +19,33 @@
>  #include <stdint.h>
>  
>  #include "libavutil/attributes.h"
> +#include "libavutil/avassert.h"
>  #include "libavutil/common.h"
>  #include "config.h"
>  #include "h263dsp.h"
>  
> +static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
> +                                        int qmul, int qadd)
> +{
> +    for (ptrdiff_t i = 0; i <= len; i++) {

In the current code, the unquantizing inter does not need an initial
check (the compiler can optimize the for-loop to a do-while-like loop).
This will be lost here.

> +        int level = block[i];
> +
> +        if (level) {
> +            if (level < 0)
> +                level = level * qmul - qadd;
> +            else
> +                level = level * qmul + qadd;
> +            block[i] = level;
> +        }
> +    }
> +}
> +
> +static void h263_dct_unquantize_intra_c(int16_t *block, ptrdiff_t len,
> +                                        int qmul, int qadd)
> +{
> +    h263_dct_unquantize_inter_c(block + 1, len - 1, qmul, qadd);
> +}
> +
>  const uint8_t ff_h263_loop_filter_strength[32] = {
>      0, 1, 1, 2, 2, 3, 3,  4,  4,  4,  5,  5,  6,  6,  7, 7,
>      7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12
> @@ -116,6 +139,8 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale)
>  
>  av_cold void ff_h263dsp_init(H263DSPContext *ctx)
>  {
> +    ctx->h263_dct_unquantize_intra = h263_dct_unquantize_intra_c;
> +    ctx->h263_dct_unquantize_inter = h263_dct_unquantize_inter_c;
>      ctx->h263_h_loop_filter = h263_h_loop_filter_c;
>      ctx->h263_v_loop_filter = h263_v_loop_filter_c;
>  
> diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
> index 2dccd23392..d26498f491 100644
> --- a/libavcodec/h263dsp.h
> +++ b/libavcodec/h263dsp.h
> @@ -24,6 +24,10 @@
>  extern const uint8_t ff_h263_loop_filter_strength[32];
>  
>  typedef struct H263DSPContext {
> +    void (*h263_dct_unquantize_intra)(int16_t *block /* align 16 */,
> +                                      ptrdiff_t len, int mul, int add);
> +    void (*h263_dct_unquantize_inter)(int16_t *block /* align 16 */,
> +                                      ptrdiff_t len, int mul, int add);
>      void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
>      void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
>  } H263DSPContext;

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
@ 2024-07-06 15:23   ` Andreas Rheinhardt
  2024-07-06 16:10     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 29+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 15:23 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> ---
>  configure              |  4 ++--
>  libavcodec/mpegvideo.c | 40 +++++++++-------------------------------
>  2 files changed, 11 insertions(+), 33 deletions(-)
> 
> diff --git a/configure b/configure
> index fed4c44cd1..42b9a72d5a 100755
> --- a/configure
> +++ b/configure
> @@ -2954,8 +2954,8 @@ ftr_decoder_select="adts_header"
>  g2m_decoder_deps="zlib"
>  g2m_decoder_select="blockdsp idctdsp jpegtables"
>  g729_decoder_select="audiodsp"
> -h261_decoder_select="mpegvideodec"
> -h261_encoder_select="mpegvideoenc"
> +h261_decoder_select="h263dsp mpegvideodec"

Unnecessary since f793074784ae79dabc4f83b61710161b3fe3288c
(Btw: It was not only H.261 and H.263 using these; RV34 and VC-1 did so,
too, but only for the error-resilience code. But this is no longer an
issue since 7b539ca3e6bae701d88096ff8dc3db7f13b7318a.)

> +h261_encoder_select="h263dsp mpegvideoenc"
>  h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp"
>  h263_encoder_select="h263dsp mpegvideoenc"
>  h263i_decoder_select="h263_decoder"
> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> index b9a0469335..c9094d1cce 100644
> --- a/libavcodec/mpegvideo.c
> +++ b/libavcodec/mpegvideo.c
> @@ -202,13 +202,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>                                    int16_t *block, int n, int qscale)
>  {
> -    int i, level, qmul, qadd;
> -    int nCoeffs;
> +    int qmul = qscale << 1;
> +    int qadd, nCoeffs;
>  
>      av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>  
> -    qmul = qscale << 1;
> -
>      if (!s->h263_aic) {
>          block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
>          qadd = (qscale - 1) | 1;
> @@ -220,43 +218,20 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>      else
>          nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
>  
> -    for(i=1; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
>  }
>  
>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>                                    int16_t *block, int n, int qscale)
>  {
> -    int i, level, qmul, qadd;
> +    int qmul = qscale << 1;
> +    int qadd = (qscale - 1) | 1;
>      int nCoeffs;
>  
>      av_assert2(s->block_last_index[n]>=0);
>  
> -    qadd = (qscale - 1) | 1;
> -    qmul = qscale << 1;
> -
>      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> -
> -    for(i=0; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);

This adds an indirection. I have asked you to actually benchmark this
code (and not only the DSP function you add), but you never did.

>  }
>  
>  
> @@ -276,6 +251,9 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
>  static av_cold void dsp_init(MpegEncContext *s)
>  {
>      ff_blockdsp_init(&s->bdsp);
> +#if CONFIG_H263DSP
> +    ff_h263dsp_init(&s->h263dsp);
> +#endif
>      ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
>      ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
>  

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-06 15:17 ` Andreas Rheinhardt
@ 2024-07-06 16:10   ` Rémi Denis-Courmont
  2024-07-06 16:19     ` Andreas Rheinhardt
  0 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-06 16:10 UTC (permalink / raw)
  To: ffmpeg-devel

Le lauantaina 6. heinäkuuta 2024, 18.17.37 EEST Andreas Rheinhardt a écrit :
> Rémi Denis-Courmont:
> > Note that optimised implementations of these functions will be taken
> > into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
> > are *not* overloaded by existing optimisations.
> > ---
> > 
> >  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
> >  libavcodec/h263dsp.h |  4 ++++
> >  2 files changed, 29 insertions(+)
> > 
> > diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
> > index 6a13353499..dc146bf821 100644
> > --- a/libavcodec/h263dsp.c
> > +++ b/libavcodec/h263dsp.c
> > @@ -19,10 +19,33 @@
> > 
> >  #include <stdint.h>
> >  
> >  #include "libavutil/attributes.h"
> > 
> > +#include "libavutil/avassert.h"
> > 
> >  #include "libavutil/common.h"
> >  #include "config.h"
> >  #include "h263dsp.h"
> > 
> > +static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
> > +                                        int qmul, int qadd)
> > +{
> > +    for (ptrdiff_t i = 0; i <= len; i++) {
> 
> In the current code, the unquantizing inter does not need an initial
> check (the compiler can optimize the for-loop to a do-while-like loop).
> This will be lost here.

If this not the consequence of your own actions...

You asked:
- to use a signed type there, and
- to avoid the separate initial branch in the intra case.

And that is exactly how this ends up. You can't have it both ways.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-06 15:23   ` Andreas Rheinhardt
@ 2024-07-06 16:10     ` Rémi Denis-Courmont
  2024-07-06 16:20       ` Andreas Rheinhardt
  0 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-06 16:10 UTC (permalink / raw)
  To: ffmpeg-devel

Le lauantaina 6. heinäkuuta 2024, 18.23.00 EEST Andreas Rheinhardt a écrit :
> >  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
> >  
> >                                    int16_t *block, int n, int qscale)
> >  
> >  {
> > 
> > -    int i, level, qmul, qadd;
> > +    int qmul = qscale << 1;
> > +    int qadd = (qscale - 1) | 1;
> > 
> >      int nCoeffs;
> >      
> >      av_assert2(s->block_last_index[n]>=0);
> > 
> > -    qadd = (qscale - 1) | 1;
> > -    qmul = qscale << 1;
> > -
> > 
> >      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> > 
> > -
> > -    for(i=0; i<=nCoeffs; i++) {
> > -        level = block[i];
> > -        if (level) {
> > -            if (level < 0) {
> > -                level = level * qmul - qadd;
> > -            } else {
> > -                level = level * qmul + qadd;
> > -            }
> > -            block[i] = level;
> > -        }
> > -    }
> > +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
> 
> This adds an indirection. I have asked you to actually benchmark this
> code (and not only the DSP function you add), but you never did.

I already pointed out previously that this is the way this project does DSP 
code. Certainly it would be nice to hard-code the path when there is only one 
possible. This is often the case on Armv8 notably, and of course on platforms 
without optimisations.

But that's a general problem way beyond the scope of this patchset. We always 
add indirect function calls in this sort of situation, and I don't see why I 
would have duty to benchmark it, so I am going to ignore this.

-- 
雷米‧德尼-库尔蒙
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-06 16:10   ` Rémi Denis-Courmont
@ 2024-07-06 16:19     ` Andreas Rheinhardt
  2024-07-06 17:28       ` Rémi Denis-Courmont
  0 siblings, 1 reply; 29+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 16:19 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> Le lauantaina 6. heinäkuuta 2024, 18.17.37 EEST Andreas Rheinhardt a écrit :
>> Rémi Denis-Courmont:
>>> Note that optimised implementations of these functions will be taken
>>> into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
>>> are *not* overloaded by existing optimisations.
>>> ---
>>>
>>>  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
>>>  libavcodec/h263dsp.h |  4 ++++
>>>  2 files changed, 29 insertions(+)
>>>
>>> diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
>>> index 6a13353499..dc146bf821 100644
>>> --- a/libavcodec/h263dsp.c
>>> +++ b/libavcodec/h263dsp.c
>>> @@ -19,10 +19,33 @@
>>>
>>>  #include <stdint.h>
>>>  
>>>  #include "libavutil/attributes.h"
>>>
>>> +#include "libavutil/avassert.h"
>>>
>>>  #include "libavutil/common.h"
>>>  #include "config.h"
>>>  #include "h263dsp.h"
>>>
>>> +static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
>>> +                                        int qmul, int qadd)
>>> +{
>>> +    for (ptrdiff_t i = 0; i <= len; i++) {
>>
>> In the current code, the unquantizing inter does not need an initial
>> check (the compiler can optimize the for-loop to a do-while-like loop).
>> This will be lost here.
> 
> If this not the consequence of your own actions...
> 
> You asked:
> - to use a signed type there, and
> - to avoid the separate initial branch in the intra case.
> 
> And that is exactly how this ends up. You can't have it both ways.
> 

One can have it both ways: Use a do-while loop.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-06 16:10     ` Rémi Denis-Courmont
@ 2024-07-06 16:20       ` Andreas Rheinhardt
  2024-07-06 16:47         ` Rémi Denis-Courmont
  0 siblings, 1 reply; 29+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 16:20 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> Le lauantaina 6. heinäkuuta 2024, 18.23.00 EEST Andreas Rheinhardt a écrit :
>>>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>>>  
>>>                                    int16_t *block, int n, int qscale)
>>>  
>>>  {
>>>
>>> -    int i, level, qmul, qadd;
>>> +    int qmul = qscale << 1;
>>> +    int qadd = (qscale - 1) | 1;
>>>
>>>      int nCoeffs;
>>>      
>>>      av_assert2(s->block_last_index[n]>=0);
>>>
>>> -    qadd = (qscale - 1) | 1;
>>> -    qmul = qscale << 1;
>>> -
>>>
>>>      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
>>>
>>> -
>>> -    for(i=0; i<=nCoeffs; i++) {
>>> -        level = block[i];
>>> -        if (level) {
>>> -            if (level < 0) {
>>> -                level = level * qmul - qadd;
>>> -            } else {
>>> -                level = level * qmul + qadd;
>>> -            }
>>> -            block[i] = level;
>>> -        }
>>> -    }
>>> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
>>
>> This adds an indirection. I have asked you to actually benchmark this
>> code (and not only the DSP function you add), but you never did.
> 
> I already pointed out previously that this is the way this project does DSP 
> code. Certainly it would be nice to hard-code the path when there is only one 
> possible. This is often the case on Armv8 notably, and of course on platforms 
> without optimisations.
> 
> But that's a general problem way beyond the scope of this patchset. We always 
> add indirect function calls in this sort of situation, and I don't see why I 
> would have duty to benchmark it, so I am going to ignore this.

You have a duty to benchmark it because you add it where it wasn't before.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-06 16:20       ` Andreas Rheinhardt
@ 2024-07-06 16:47         ` Rémi Denis-Courmont
  2024-07-06 18:27           ` Andreas Rheinhardt
  0 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-06 16:47 UTC (permalink / raw)
  To: ffmpeg-devel

Le lauantaina 6. heinäkuuta 2024, 19.20.33 EEST Andreas Rheinhardt a écrit :
> Rémi Denis-Courmont:
> > Le lauantaina 6. heinäkuuta 2024, 18.23.00 EEST Andreas Rheinhardt a écrit 
:
> >>>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
> >>>  
> >>>                                    int16_t *block, int n, int qscale)
> >>>  
> >>>  {
> >>> 
> >>> -    int i, level, qmul, qadd;
> >>> +    int qmul = qscale << 1;
> >>> +    int qadd = (qscale - 1) | 1;
> >>> 
> >>>      int nCoeffs;
> >>>      
> >>>      av_assert2(s->block_last_index[n]>=0);
> >>> 
> >>> -    qadd = (qscale - 1) | 1;
> >>> -    qmul = qscale << 1;
> >>> -
> >>> 
> >>>      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> >>> 
> >>> -
> >>> -    for(i=0; i<=nCoeffs; i++) {
> >>> -        level = block[i];
> >>> -        if (level) {
> >>> -            if (level < 0) {
> >>> -                level = level * qmul - qadd;
> >>> -            } else {
> >>> -                level = level * qmul + qadd;
> >>> -            }
> >>> -            block[i] = level;
> >>> -        }
> >>> -    }
> >>> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
> >> 
> >> This adds an indirection. I have asked you to actually benchmark this
> >> code (and not only the DSP function you add), but you never did.
> > 
> > I already pointed out previously that this is the way this project does
> > DSP
> > code. Certainly it would be nice to hard-code the path when there is only
> > one possible. This is often the case on Armv8 notably, and of course on
> > platforms without optimisations.
> > 
> > But that's a general problem way beyond the scope of this patchset. We
> > always add indirect function calls in this sort of situation, and I don't
> > see why I would have duty to benchmark it, so I am going to ignore this.
> 
> You have a duty to benchmark it because you add it where it wasn't before.

I don't recall other people benchmarking the indirect branch they've added 
previously for other DSP code. Recent examples include VVC and FLAC. 
Rightfully so, because there is not really an alternative anyway. Even GNU 
IFUNCs and Glibc alternative libraries internally use an indirect branch 
(hidden in PLT/GOT), and FFmpeg can't self-patch at load-time like the Linux 
kernel does, nor can it generate dynamic PLT entries with direct branches.

Also if an indirect call is unacceptable, then how come the calling code is 
itself an indirect call and for abstraction rather than performance.

Your request is completely arbitrary here. Yes, there is already an indirect 
call close up, and so? I'm not trying to clean MpegEncContext here, only 
trying to add one function to checkasm, RVV and (with James' work) post-MMX 
x86.

Lastly, you don't even specify what benchmark to run. Comparing something 
against nothing is, as my manager would say, pointless, since the relative 
overhead ought to be an approximation of infinity (in practice, you end up 
measuring the overhead of the benchmarking code instead).

-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-06 16:19     ` Andreas Rheinhardt
@ 2024-07-06 17:28       ` Rémi Denis-Courmont
  2024-07-06 18:23         ` Andreas Rheinhardt
  0 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-07-06 17:28 UTC (permalink / raw)
  To: ffmpeg-devel

Le lauantaina 6. heinäkuuta 2024, 19.19.35 EEST Andreas Rheinhardt a écrit :
> Rémi Denis-Courmont:
> > Le lauantaina 6. heinäkuuta 2024, 18.17.37 EEST Andreas Rheinhardt a écrit 
:
> >> Rémi Denis-Courmont:
> >>> Note that optimised implementations of these functions will be taken
> >>> into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
> >>> are *not* overloaded by existing optimisations.
> >>> ---
> >>> 
> >>>  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
> >>>  libavcodec/h263dsp.h |  4 ++++
> >>>  2 files changed, 29 insertions(+)
> >>> 
> >>> diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
> >>> index 6a13353499..dc146bf821 100644
> >>> --- a/libavcodec/h263dsp.c
> >>> +++ b/libavcodec/h263dsp.c
> >>> @@ -19,10 +19,33 @@
> >>> 
> >>>  #include <stdint.h>
> >>>  
> >>>  #include "libavutil/attributes.h"
> >>> 
> >>> +#include "libavutil/avassert.h"
> >>> 
> >>>  #include "libavutil/common.h"
> >>>  #include "config.h"
> >>>  #include "h263dsp.h"
> >>> 
> >>> +static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
> >>> +                                        int qmul, int qadd)
> >>> +{
> >>> +    for (ptrdiff_t i = 0; i <= len; i++) {
> >> 
> >> In the current code, the unquantizing inter does not need an initial
> >> check (the compiler can optimize the for-loop to a do-while-like loop).
> >> This will be lost here.
> > 
> > If this not the consequence of your own actions...
> > 
> > You asked:
> > - to use a signed type there, and
> > - to avoid the separate initial branch in the intra case.
> > 
> > And that is exactly how this ends up. You can't have it both ways.
> 
> One can have it both ways: Use a do-while loop.

That is only possible if the otherwise identical intra/inter code are 
duplicated, or if putting back the initial check in the intra case. The very 
check which you asked to remove.

So no, you can't have it both ways. I don't really care either way, but the 
check for intra needs to be *somewhere*.

-- 
レミ・デニ-クールモン
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions
  2024-07-06 17:28       ` Rémi Denis-Courmont
@ 2024-07-06 18:23         ` Andreas Rheinhardt
  0 siblings, 0 replies; 29+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 18:23 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> Le lauantaina 6. heinäkuuta 2024, 19.19.35 EEST Andreas Rheinhardt a écrit :
>> Rémi Denis-Courmont:
>>> Le lauantaina 6. heinäkuuta 2024, 18.17.37 EEST Andreas Rheinhardt a écrit 
> :
>>>> Rémi Denis-Courmont:
>>>>> Note that optimised implementations of these functions will be taken
>>>>> into actual use only if MpegEncContext.dct_unquantize_h263_{inter,intra}
>>>>> are *not* overloaded by existing optimisations.
>>>>> ---
>>>>>
>>>>>  libavcodec/h263dsp.c | 25 +++++++++++++++++++++++++
>>>>>  libavcodec/h263dsp.h |  4 ++++
>>>>>  2 files changed, 29 insertions(+)
>>>>>
>>>>> diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
>>>>> index 6a13353499..dc146bf821 100644
>>>>> --- a/libavcodec/h263dsp.c
>>>>> +++ b/libavcodec/h263dsp.c
>>>>> @@ -19,10 +19,33 @@
>>>>>
>>>>>  #include <stdint.h>
>>>>>  
>>>>>  #include "libavutil/attributes.h"
>>>>>
>>>>> +#include "libavutil/avassert.h"
>>>>>
>>>>>  #include "libavutil/common.h"
>>>>>  #include "config.h"
>>>>>  #include "h263dsp.h"
>>>>>
>>>>> +static void h263_dct_unquantize_inter_c(int16_t *block, ptrdiff_t len,
>>>>> +                                        int qmul, int qadd)
>>>>> +{
>>>>> +    for (ptrdiff_t i = 0; i <= len; i++) {
>>>>
>>>> In the current code, the unquantizing inter does not need an initial
>>>> check (the compiler can optimize the for-loop to a do-while-like loop).
>>>> This will be lost here.
>>>
>>> If this not the consequence of your own actions...
>>>
>>> You asked:
>>> - to use a signed type there, and
>>> - to avoid the separate initial branch in the intra case.
>>>
>>> And that is exactly how this ends up. You can't have it both ways.
>>
>> One can have it both ways: Use a do-while loop.
> 
> That is only possible if the otherwise identical intra/inter code are 
> duplicated, or if putting back the initial check in the intra case. The very 
> check which you asked to remove.
> 
> So no, you can't have it both ways. I don't really care either way, but the 
> check for intra needs to be *somewhere*.
> 

No, it need not exist. The point of raster_end etc. is purely to
shortcut this loop. It would be correct to always use 63 for nCoeffs. In
fact, it seems to me that dct_unquantize_h263_inter_mmx() already uses a
do-while-like loop.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-07-06 16:47         ` Rémi Denis-Courmont
@ 2024-07-06 18:27           ` Andreas Rheinhardt
  0 siblings, 0 replies; 29+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 18:27 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> Le lauantaina 6. heinäkuuta 2024, 19.20.33 EEST Andreas Rheinhardt a écrit :
>> Rémi Denis-Courmont:
>>> Le lauantaina 6. heinäkuuta 2024, 18.23.00 EEST Andreas Rheinhardt a écrit 
> :
>>>>>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>>>>>  
>>>>>                                    int16_t *block, int n, int qscale)
>>>>>  
>>>>>  {
>>>>>
>>>>> -    int i, level, qmul, qadd;
>>>>> +    int qmul = qscale << 1;
>>>>> +    int qadd = (qscale - 1) | 1;
>>>>>
>>>>>      int nCoeffs;
>>>>>      
>>>>>      av_assert2(s->block_last_index[n]>=0);
>>>>>
>>>>> -    qadd = (qscale - 1) | 1;
>>>>> -    qmul = qscale << 1;
>>>>> -
>>>>>
>>>>>      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
>>>>>
>>>>> -
>>>>> -    for(i=0; i<=nCoeffs; i++) {
>>>>> -        level = block[i];
>>>>> -        if (level) {
>>>>> -            if (level < 0) {
>>>>> -                level = level * qmul - qadd;
>>>>> -            } else {
>>>>> -                level = level * qmul + qadd;
>>>>> -            }
>>>>> -            block[i] = level;
>>>>> -        }
>>>>> -    }
>>>>> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
>>>>
>>>> This adds an indirection. I have asked you to actually benchmark this
>>>> code (and not only the DSP function you add), but you never did.
>>>
>>> I already pointed out previously that this is the way this project does
>>> DSP
>>> code. Certainly it would be nice to hard-code the path when there is only
>>> one possible. This is often the case on Armv8 notably, and of course on
>>> platforms without optimisations.
>>>
>>> But that's a general problem way beyond the scope of this patchset. We
>>> always add indirect function calls in this sort of situation, and I don't
>>> see why I would have duty to benchmark it, so I am going to ignore this.
>>
>> You have a duty to benchmark it because you add it where it wasn't before.
> 
> I don't recall other people benchmarking the indirect branch they've added 
> previously for other DSP code. Recent examples include VVC and FLAC. 
> Rightfully so, because there is not really an alternative anyway. Even GNU 
> IFUNCs and Glibc alternative libraries internally use an indirect branch 
> (hidden in PLT/GOT), and FFmpeg can't self-patch at load-time like the Linux 
> kernel does, nor can it generate dynamic PLT entries with direct branches.
> 
> Also if an indirect call is unacceptable, then how come the calling code is 
> itself an indirect call and for abstraction rather than performance.

I did not even say that it is unacceptable. Merely that it should be
benched.

> 
> Your request is completely arbitrary here. Yes, there is already an indirect 
> call close up, and so? I'm not trying to clean MpegEncContext here, only 
> trying to add one function to checkasm, RVV and (with James' work) post-MMX 
> x86.
> 
> Lastly, you don't even specify what benchmark to run. Comparing something 
> against nothing is, as my manager would say, pointless, since the relative 
> overhead ought to be an approximation of infinity (in practice, you end up 
> measuring the overhead of the benchmarking code instead).

You shall compare the function you are modifying, namely
dct_unquantize_h263_(intra|inter)_c.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter}
  2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
@ 2024-07-06 19:10   ` Andreas Rheinhardt
  0 siblings, 0 replies; 29+ messages in thread
From: Andreas Rheinhardt @ 2024-07-06 19:10 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> ---
>  tests/checkasm/h263dsp.c | 57 +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 56 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/checkasm/h263dsp.c b/tests/checkasm/h263dsp.c
> index 2d0957a90b..26020211dc 100644
> --- a/tests/checkasm/h263dsp.c
> +++ b/tests/checkasm/h263dsp.c
> @@ -18,13 +18,65 @@
>   * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
>   */
>  
> +#include <stdbool.h>
>  #include <string.h>
>  
>  #include "checkasm.h"
>  
> -#include "libavcodec/h263dsp.h"
> +#include "libavutil/avassert.h"
>  #include "libavutil/mem.h"
>  #include "libavutil/mem_internal.h"
> +#include "libavcodec/h263dsp.h"
> +#include "libavcodec/mpegvideodata.h"
> +
> +static uint_fast8_t mpeg_qscale_rnd(void)
> +{
> +    int n = rnd(), q = (n >> 1) & 31;
> +
> +    if (n & 1)
> +        return ff_mpeg2_non_linear_qscale[q];
> +    else
> +        return q << 1;

This branch reminds me of the q_scale_type check from the
mpeg2-unquantize functions. Why is it here?
(For H.263, qscale is simply a value in 1..31. This code will test
values outside of this range.)

> +}
> +
> +typedef void (*unquantizer)(int16_t *, ptrdiff_t, int, int);
> +
> +static void check_dct_unquantize(unquantizer func, bool inter)
> +{
> +    const char *name = inter ? "inter" : "intra";
> +#define LEN 64
> +    LOCAL_ALIGNED_16(int16_t, block0, [LEN]);
> +    LOCAL_ALIGNED_16(int16_t, block1, [LEN]);
> +    size_t len = rnd() % LEN;
> +    const int qscale = mpeg_qscale_rnd();
> +    const int qmul = qscale << 1;
> +    const int qadd = (rnd() & 1) ? (qscale - 1) | 1 : 0;
> +
> +    declare_func(void, int16_t *, ptrdiff_t, int, int);
> +
> +    for (size_t i = 0; i < LEN; i++) {
> +        int r = rnd();
> +
> +        block1[i] = block0[i] = (r & 1) ? (r >> 1) : 0;

This will potentially set all elements to some nonzero value. In
reality, all elements not processed in the (current) C version are zero;
in other words, one could add

    for (; i <= 63; i++)
        av_assert0(!block[i]);

at the end of dct_unquantize_h263_(inter|intra)_c. This is what makes it
possible to switch to a do-while loop. It would also make it possible to
unroll the loop/omit special handling for the tail when vectorizing, so
this test is a hindrance to potential optimizations.

> +    }
> +
> +    if (check_func(func, "h263dsp.dct_unquantize_%s", name)) {
> +        call_ref(block0, 0, qmul, qadd);
> +        call_new(block1, 0, qmul, qadd);
> +
> +        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
> +            fail();
> +
> +        av_assert0(len < LEN);
> +        call_ref(block0, len, qmul, qadd);
> +        call_new(block1, len, qmul, qadd);
> +
> +        if (memcmp(block0, block1, LEN * sizeof (int16_t)))
> +            fail();
> +
> +        bench_new(block1, LEN, qmul, qadd);
> +    }
> +}
>  
>  typedef void (*filter)(uint8_t *src, int stride, int qscale);
>  
> @@ -56,6 +108,9 @@ void checkasm_check_h263dsp(void)
>      H263DSPContext ctx;
>  
>      ff_h263dsp_init(&ctx);
> +    check_dct_unquantize(ctx.h263_dct_unquantize_intra, false);
> +    check_dct_unquantize(ctx.h263_dct_unquantize_inter, true);
> +    report("dct_unquantize");
>      check_loop_filter('h', ctx.h263_h_loop_filter);
>      check_loop_filter('v', ctx.h263_v_loop_filter);
>      report("loop_filter");

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-14 15:41         ` James Almer
@ 2024-06-14 19:03           ` Rémi Denis-Courmont
  0 siblings, 0 replies; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-14 19:03 UTC (permalink / raw)
  To: ffmpeg-devel

Le perjantaina 14. kesäkuuta 2024, 18.41.48 EEST James Almer a écrit :
> On 6/14/2024 12:11 PM, Rémi Denis-Courmont wrote:
> > Le perjantaina 14. kesäkuuta 2024, 17.45.50 EEST Rémi Denis-Courmont a 
écrit :
> >> And x86 only has MMX.
> > 
> > And the x86 code uses inline assembler. That's not viable on any ISA with
> > sane conventions such as Arm or RISC-V. The compiler will reject our
> > vector clobbers, unless the Vector extension is included in the
> > compilation target. But then that breaks run-time detection, and will be
> > rejected by all Linux distros.
> 
> I was thinking on removing that inline version and probably replacing it
> with sse2, using the dsp prototypes you introduce in this set.

I saw that, but we can't have the cake and eat it. Those DSP callbacks can't 
exist without adding a layer of indirection.

-- 
雷米‧德尼-库尔蒙
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-14 15:11       ` Rémi Denis-Courmont
@ 2024-06-14 15:41         ` James Almer
  2024-06-14 19:03           ` Rémi Denis-Courmont
  0 siblings, 1 reply; 29+ messages in thread
From: James Almer @ 2024-06-14 15:41 UTC (permalink / raw)
  To: ffmpeg-devel

On 6/14/2024 12:11 PM, Rémi Denis-Courmont wrote:
> Le perjantaina 14. kesäkuuta 2024, 17.45.50 EEST Rémi Denis-Courmont a écrit :
>> And x86 only has MMX.
> 
> And the x86 code uses inline assembler. That's not viable on any ISA with sane
> conventions such as Arm or RISC-V. The compiler will reject our vector
> clobbers, unless the Vector extension is included in the compilation target.
> But then that breaks run-time detection, and will be rejected by all Linux
> distros.

I was thinking on removing that inline version and probably replacing it 
with sse2, using the dsp prototypes you introduce in this set.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-14 14:45     ` Rémi Denis-Courmont
@ 2024-06-14 15:11       ` Rémi Denis-Courmont
  2024-06-14 15:41         ` James Almer
  0 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-14 15:11 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

Le perjantaina 14. kesäkuuta 2024, 17.45.50 EEST Rémi Denis-Courmont a écrit :
> And x86 only has MMX.

And the x86 code uses inline assembler. That's not viable on any ISA with sane 
conventions such as Arm or RISC-V. The compiler will reject our vector 
clobbers, unless the Vector extension is included in the compilation target. 
But then that breaks run-time detection, and will be rejected by all Linux 
distros.

-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-14 14:33   ` James Almer
@ 2024-06-14 14:45     ` Rémi Denis-Courmont
  2024-06-14 15:11       ` Rémi Denis-Courmont
  0 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-14 14:45 UTC (permalink / raw)
  To: FFmpeg development discussions and patches



Le 14 juin 2024 17:33:16 GMT+03:00, James Almer <jamrial@gmail.com> a écrit :
>On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
>> ---
>>   configure              |  4 ++--
>>   libavcodec/mpegvideo.c | 46 +++++++++++-------------------------------
>>   2 files changed, 14 insertions(+), 36 deletions(-)
>> 
>> diff --git a/configure b/configure
>> index 6baa9b0646..eb9d1b1f5d 100755
>> --- a/configure
>> +++ b/configure
>> @@ -2957,8 +2957,8 @@ ftr_decoder_select="adts_header"
>>   g2m_decoder_deps="zlib"
>>   g2m_decoder_select="blockdsp idctdsp jpegtables"
>>   g729_decoder_select="audiodsp"
>> -h261_decoder_select="mpegvideodec"
>> -h261_encoder_select="mpegvideoenc"
>> +h261_decoder_select="h263dsp mpegvideodec"
>> +h261_encoder_select="h263dsp mpegvideoenc"
>>   h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp"
>>   h263_encoder_select="h263dsp mpegvideoenc"
>>   h263i_decoder_select="h263_decoder"
>> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
>> index 7af823b8bd..b35fd37083 100644
>> --- a/libavcodec/mpegvideo.c
>> +++ b/libavcodec/mpegvideo.c
>> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>>   static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>>                                     int16_t *block, int n, int qscale)
>>   {
>> -    int i, level, qmul, qadd;
>> -    int nCoeffs;
>> +    int qmul = qscale << 1;
>> +    int qadd, nCoeffs;
>>         av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>>   -    qmul = qscale << 1;
>> -
>>       if (!s->h263_aic) {
>>           block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
>>           qadd = (qscale - 1) | 1;
>> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>>           qadd = 0;
>>       }
>>       if(s->ac_pred)
>> -        nCoeffs=63;
>> +        nCoeffs = 64;
>>       else
>> -        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
>> +        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
>>   -    for(i=1; i<=nCoeffs; i++) {
>> -        level = block[i];
>> -        if (level) {
>> -            if (level < 0) {
>> -                level = level * qmul - qadd;
>> -            } else {
>> -                level = level * qmul + qadd;
>> -            }
>> -            block[i] = level;
>> -        }
>> -    }
>> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
>
>Looking further into this, you're adding a function pointer call in a function that's already called from a function pointer. And both x86 and arm have asm optimized versions of this entire method, which includes all the setup before the loop.

I am not interested in copying existing bad design. Note that the Arm code uses intrinsics. I don't want to use RVV intrinsics especially just for this. And x86 only has MMX.

>Can't you do the same for riscv?

Sure, RV can address fixed offsets up to +/-2 KiB. If someone *else* adds a assembler-friendly header file that defines the offsets to the relevant context fields, and rewrites the checkasm code to match, that is.

> Is there anything preventing you from accessing fields at specific offsets within MpegEncContext?

My strong belief that that would be technically wrong, yes.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-12  4:47 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
@ 2024-06-14 14:33   ` James Almer
  2024-06-14 14:45     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 29+ messages in thread
From: James Almer @ 2024-06-14 14:33 UTC (permalink / raw)
  To: ffmpeg-devel

On 6/12/2024 1:47 AM, Rémi Denis-Courmont wrote:
> ---
>   configure              |  4 ++--
>   libavcodec/mpegvideo.c | 46 +++++++++++-------------------------------
>   2 files changed, 14 insertions(+), 36 deletions(-)
> 
> diff --git a/configure b/configure
> index 6baa9b0646..eb9d1b1f5d 100755
> --- a/configure
> +++ b/configure
> @@ -2957,8 +2957,8 @@ ftr_decoder_select="adts_header"
>   g2m_decoder_deps="zlib"
>   g2m_decoder_select="blockdsp idctdsp jpegtables"
>   g729_decoder_select="audiodsp"
> -h261_decoder_select="mpegvideodec"
> -h261_encoder_select="mpegvideoenc"
> +h261_decoder_select="h263dsp mpegvideodec"
> +h261_encoder_select="h263dsp mpegvideoenc"
>   h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp"
>   h263_encoder_select="h263dsp mpegvideoenc"
>   h263i_decoder_select="h263_decoder"
> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> index 7af823b8bd..b35fd37083 100644
> --- a/libavcodec/mpegvideo.c
> +++ b/libavcodec/mpegvideo.c
> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>   static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>                                     int16_t *block, int n, int qscale)
>   {
> -    int i, level, qmul, qadd;
> -    int nCoeffs;
> +    int qmul = qscale << 1;
> +    int qadd, nCoeffs;
>   
>       av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>   
> -    qmul = qscale << 1;
> -
>       if (!s->h263_aic) {
>           block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
>           qadd = (qscale - 1) | 1;
> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>           qadd = 0;
>       }
>       if(s->ac_pred)
> -        nCoeffs=63;
> +        nCoeffs = 64;
>       else
> -        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
> +        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
>   
> -    for(i=1; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);

Looking further into this, you're adding a function pointer call in a 
function that's already called from a function pointer. And both x86 and 
arm have asm optimized versions of this entire method, which includes 
all the setup before the loop.

Can't you do the same for riscv? Is there anything preventing you from 
accessing fields at specific offsets within MpegEncContext?

>   }
>   
>   static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>                                     int16_t *block, int n, int qscale)
>   {
> -    int i, level, qmul, qadd;
> +    int qmul = qscale << 1;
> +    int qadd = (qscale - 1) | 1;
>       int nCoeffs;
>   
>       av_assert2(s->block_last_index[n]>=0);
>   
> -    qadd = (qscale - 1) | 1;
> -    qmul = qscale << 1;
> -
> -    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> -
> -    for(i=0; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
>   }
>   
>   
> @@ -275,6 +250,9 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
>   static av_cold int dct_init(MpegEncContext *s)
>   {
>       ff_blockdsp_init(&s->bdsp);
> +#if CONFIG_H263DSP
> +    ff_h263dsp_init(&s->h263dsp);
> +#endif
>       ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
>       ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
>   
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-12  4:47 [FFmpeg-devel] [PATCHv5 " Rémi Denis-Courmont
@ 2024-06-12  4:47 ` Rémi Denis-Courmont
  2024-06-14 14:33   ` James Almer
  0 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-12  4:47 UTC (permalink / raw)
  To: ffmpeg-devel

---
 configure              |  4 ++--
 libavcodec/mpegvideo.c | 46 +++++++++++-------------------------------
 2 files changed, 14 insertions(+), 36 deletions(-)

diff --git a/configure b/configure
index 6baa9b0646..eb9d1b1f5d 100755
--- a/configure
+++ b/configure
@@ -2957,8 +2957,8 @@ ftr_decoder_select="adts_header"
 g2m_decoder_deps="zlib"
 g2m_decoder_select="blockdsp idctdsp jpegtables"
 g729_decoder_select="audiodsp"
-h261_decoder_select="mpegvideodec"
-h261_encoder_select="mpegvideoenc"
+h261_decoder_select="h263dsp mpegvideodec"
+h261_encoder_select="h263dsp mpegvideoenc"
 h263_decoder_select="h263_parser h263dsp mpegvideodec qpeldsp"
 h263_encoder_select="h263dsp mpegvideoenc"
 h263i_decoder_select="h263_decoder"
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 7af823b8bd..b35fd37083 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
-    int nCoeffs;
+    int qmul = qscale << 1;
+    int qadd, nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
 
-    qmul = qscale << 1;
-
     if (!s->h263_aic) {
         block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
         qadd = (qscale - 1) | 1;
@@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
         qadd = 0;
     }
     if(s->ac_pred)
-        nCoeffs=63;
+        nCoeffs = 64;
     else
-        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
+        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
 
-    for(i=1; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
 }
 
 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
+    int qmul = qscale << 1;
+    int qadd = (qscale - 1) | 1;
     int nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0);
 
-    qadd = (qscale - 1) | 1;
-    qmul = qscale << 1;
-
-    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-
-    for(i=0; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
+    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
 }
 
 
@@ -275,6 +250,9 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
 static av_cold int dct_init(MpegEncContext *s)
 {
     ff_blockdsp_init(&s->bdsp);
+#if CONFIG_H263DSP
+    ff_h263dsp_init(&s->h263dsp);
+#endif
     ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
     ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
 
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-10 12:14     ` Rémi Denis-Courmont
@ 2024-06-10 12:32       ` Michael Niedermayer
  0 siblings, 0 replies; 29+ messages in thread
From: Michael Niedermayer @ 2024-06-10 12:32 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 3789 bytes --]

On Mon, Jun 10, 2024 at 03:14:14PM +0300, Rémi Denis-Courmont wrote:
> 
> 
> Le 10 juin 2024 14:41:31 GMT+03:00, Michael Niedermayer <michael@niedermayer.cc> a écrit :
> >On Sun, Jun 09, 2024 at 07:23:45PM +0300, Rémi Denis-Courmont wrote:
> >> ---
> >>  libavcodec/mpegvideo.c | 44 ++++++++++--------------------------------
> >>  1 file changed, 10 insertions(+), 34 deletions(-)
> >> 
> >> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> >> index 7af823b8bd..9be0fecc8d 100644
> >> --- a/libavcodec/mpegvideo.c
> >> +++ b/libavcodec/mpegvideo.c
> >> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
> >>  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
> >>                                    int16_t *block, int n, int qscale)
> >>  {
> >> -    int i, level, qmul, qadd;
> >> -    int nCoeffs;
> >> +    int qmul = qscale << 1;
> >> +    int qadd, nCoeffs;
> >>  
> >>      av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
> >>  
> >> -    qmul = qscale << 1;
> >> -
> >>      if (!s->h263_aic) {
> >>          block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
> >>          qadd = (qscale - 1) | 1;
> >> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
> >>          qadd = 0;
> >>      }
> >>      if(s->ac_pred)
> >> -        nCoeffs=63;
> >> +        nCoeffs = 64;
> >>      else
> >> -        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
> >> +        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
> >>  
> >> -    for(i=1; i<=nCoeffs; i++) {
> >> -        level = block[i];
> >> -        if (level) {
> >> -            if (level < 0) {
> >> -                level = level * qmul - qadd;
> >> -            } else {
> >> -                level = level * qmul + qadd;
> >> -            }
> >> -            block[i] = level;
> >> -        }
> >> -    }
> >> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
> >>  }
> >>  
> >>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
> >>                                    int16_t *block, int n, int qscale)
> >>  {
> >> -    int i, level, qmul, qadd;
> >> +    int qmul = qscale << 1;
> >> +    int qadd = (qscale - 1) | 1;
> >>      int nCoeffs;
> >>  
> >>      av_assert2(s->block_last_index[n]>=0);
> >>  
> >> -    qadd = (qscale - 1) | 1;
> >> -    qmul = qscale << 1;
> >> -
> >> -    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> >> -
> >> -    for(i=0; i<=nCoeffs; i++) {
> >> -        level = block[i];
> >> -        if (level) {
> >> -            if (level < 0) {
> >> -                level = level * qmul - qadd;
> >> -            } else {
> >> -                level = level * qmul + qadd;
> >> -            }
> >> -            block[i] = level;
> >> -        }
> >> -    }
> >> +    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
> >> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
> >
> >It would be better if the "+ 1" would not be needed and teh functions
> >would keep using "<="
> 
> Why?

because its 1 instruction less and the compieler cannot optimize it out.


> AFAICT, that just makes the assembler more confusing by requiring an unusual boundary check.
> 
> And then for SVE and RVV, adding one is unavoidable possible, as we need the element count to predicate the vector ops. So we'd just end up having to add one in the assembler instead of C.

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If a bugfix only changes things apparently unrelated to the bug with no
further explanation, that is a good sign that the bugfix is wrong.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-10 11:41   ` Michael Niedermayer
@ 2024-06-10 12:14     ` Rémi Denis-Courmont
  2024-06-10 12:32       ` Michael Niedermayer
  0 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-10 12:14 UTC (permalink / raw)
  To: FFmpeg development discussions and patches



Le 10 juin 2024 14:41:31 GMT+03:00, Michael Niedermayer <michael@niedermayer.cc> a écrit :
>On Sun, Jun 09, 2024 at 07:23:45PM +0300, Rémi Denis-Courmont wrote:
>> ---
>>  libavcodec/mpegvideo.c | 44 ++++++++++--------------------------------
>>  1 file changed, 10 insertions(+), 34 deletions(-)
>> 
>> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
>> index 7af823b8bd..9be0fecc8d 100644
>> --- a/libavcodec/mpegvideo.c
>> +++ b/libavcodec/mpegvideo.c
>> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>>  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>>                                    int16_t *block, int n, int qscale)
>>  {
>> -    int i, level, qmul, qadd;
>> -    int nCoeffs;
>> +    int qmul = qscale << 1;
>> +    int qadd, nCoeffs;
>>  
>>      av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>>  
>> -    qmul = qscale << 1;
>> -
>>      if (!s->h263_aic) {
>>          block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
>>          qadd = (qscale - 1) | 1;
>> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>>          qadd = 0;
>>      }
>>      if(s->ac_pred)
>> -        nCoeffs=63;
>> +        nCoeffs = 64;
>>      else
>> -        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
>> +        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
>>  
>> -    for(i=1; i<=nCoeffs; i++) {
>> -        level = block[i];
>> -        if (level) {
>> -            if (level < 0) {
>> -                level = level * qmul - qadd;
>> -            } else {
>> -                level = level * qmul + qadd;
>> -            }
>> -            block[i] = level;
>> -        }
>> -    }
>> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
>>  }
>>  
>>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>>                                    int16_t *block, int n, int qscale)
>>  {
>> -    int i, level, qmul, qadd;
>> +    int qmul = qscale << 1;
>> +    int qadd = (qscale - 1) | 1;
>>      int nCoeffs;
>>  
>>      av_assert2(s->block_last_index[n]>=0);
>>  
>> -    qadd = (qscale - 1) | 1;
>> -    qmul = qscale << 1;
>> -
>> -    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
>> -
>> -    for(i=0; i<=nCoeffs; i++) {
>> -        level = block[i];
>> -        if (level) {
>> -            if (level < 0) {
>> -                level = level * qmul - qadd;
>> -            } else {
>> -                level = level * qmul + qadd;
>> -            }
>> -            block[i] = level;
>> -        }
>> -    }
>> +    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
>> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
>
>It would be better if the "+ 1" would not be needed and teh functions
>would keep using "<="

Why? AFAICT, that just makes the assembler more confusing by requiring an unusual boundary check.

And then for SVE and RVV, adding one is unavoidable possible, as we need the element count to predicate the vector ops. So we'd just end up having to add one in the assembler instead of C.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
@ 2024-06-10 11:41   ` Michael Niedermayer
  2024-06-10 12:14     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 29+ messages in thread
From: Michael Niedermayer @ 2024-06-10 11:41 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 2959 bytes --]

On Sun, Jun 09, 2024 at 07:23:45PM +0300, Rémi Denis-Courmont wrote:
> ---
>  libavcodec/mpegvideo.c | 44 ++++++++++--------------------------------
>  1 file changed, 10 insertions(+), 34 deletions(-)
> 
> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> index 7af823b8bd..9be0fecc8d 100644
> --- a/libavcodec/mpegvideo.c
> +++ b/libavcodec/mpegvideo.c
> @@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>                                    int16_t *block, int n, int qscale)
>  {
> -    int i, level, qmul, qadd;
> -    int nCoeffs;
> +    int qmul = qscale << 1;
> +    int qadd, nCoeffs;
>  
>      av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
>  
> -    qmul = qscale << 1;
> -
>      if (!s->h263_aic) {
>          block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
>          qadd = (qscale - 1) | 1;
> @@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>          qadd = 0;
>      }
>      if(s->ac_pred)
> -        nCoeffs=63;
> +        nCoeffs = 64;
>      else
> -        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
> +        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
>  
> -    for(i=1; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
>  }
>  
>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>                                    int16_t *block, int n, int qscale)
>  {
> -    int i, level, qmul, qadd;
> +    int qmul = qscale << 1;
> +    int qadd = (qscale - 1) | 1;
>      int nCoeffs;
>  
>      av_assert2(s->block_last_index[n]>=0);
>  
> -    qadd = (qscale - 1) | 1;
> -    qmul = qscale << 1;
> -
> -    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> -
> -    for(i=0; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
> +    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);

It would be better if the "+ 1" would not be needed and teh functions
would keep using "<="

thx

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

During times of universal deceit, telling the truth becomes a
revolutionary act. -- George Orwell

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-09 16:13   ` Andreas Rheinhardt
@ 2024-06-09 16:39     ` Rémi Denis-Courmont
  0 siblings, 0 replies; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-09 16:39 UTC (permalink / raw)
  To: ffmpeg-devel

Le sunnuntaina 9. kesäkuuta 2024, 19.13.54 EEST Andreas Rheinhardt a écrit :
> Rémi Denis-Courmont:
> > ---
> > 
> >  libavcodec/mpegvideo.c | 30 +++++-------------------------
> >  1 file changed, 5 insertions(+), 25 deletions(-)
> > 
> > diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> > index 7af823b8bd..fa25d14970 100644
> > --- a/libavcodec/mpegvideo.c
> > +++ b/libavcodec/mpegvideo.c
> > @@ -201,7 +201,7 @@ static void
> > dct_unquantize_mpeg2_inter_c(MpegEncContext *s,> 
> >  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
> >  
> >                                    int16_t *block, int n, int qscale)
> >  
> >  {
> > 
> > -    int i, level, qmul, qadd;
> > +    int qmul, qadd;
> > 
> >      int nCoeffs;
> >      
> >      av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
> > 
> > @@ -219,23 +219,13 @@ static void
> > dct_unquantize_h263_intra_c(MpegEncContext *s,> 
> >      else
> >      
> >          nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
> > 
> > -    for(i=1; i<=nCoeffs; i++) {
> > -        level = block[i];
> > -        if (level) {
> > -            if (level < 0) {
> > -                level = level * qmul - qadd;
> > -            } else {
> > -                level = level * qmul + qadd;
> > -            }
> > -            block[i] = level;
> > -        }
> > -    }
> > +    s->h263dsp.h263_dct_unquantize(block, 1, nCoeffs, qmul, qadd);
> > 
> >  }
> >  
> >  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
> >  
> >                                    int16_t *block, int n, int qscale)
> >  
> >  {
> > 
> > -    int i, level, qmul, qadd;
> > +    int qmul, qadd;
> > 
> >      int nCoeffs;
> >      
> >      av_assert2(s->block_last_index[n]>=0);
> > 
> > @@ -244,18 +234,7 @@ static void
> > dct_unquantize_h263_inter_c(MpegEncContext *s,> 
> >      qmul = qscale << 1;
> >      
> >      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> > 
> > -
> > -    for(i=0; i<=nCoeffs; i++) {
> > -        level = block[i];
> > -        if (level) {
> > -            if (level < 0) {
> > -                level = level * qmul - qadd;
> > -            } else {
> > -                level = level * qmul + qadd;
> > -            }
> > -            block[i] = level;
> > -        }
> > -    }
> > +    s->h263dsp.h263_dct_unquantize(block, 0, nCoeffs, qmul, qadd);
> > 
> >  }
> > 
> > @@ -275,6 +254,7 @@ static void gray8(uint8_t *dst, const uint8_t *src,
> > ptrdiff_t linesize, int h)> 
> >  static av_cold int dct_init(MpegEncContext *s)
> >  {
> >  
> >      ff_blockdsp_init(&s->bdsp);
> > 
> > +    ff_h263dsp_init(&s->h263dsp);
> > 
> >      ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
> >      ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
> 
> This approach will make H.261 use a h263dsp function which is a misnomer

In reality, that is an existing problem: the *existing* pair of DCT functions 
for H.263 is provisioned regardless of the codec. Fixing the god object and 
spaghetti anti-patterns in MpegEncContext is way outside the scope of this MR.

> and will lead to undefined references if no H.263 decoder or encoder is
> enabled.

Fair enough, *that* is easy to fix.

-- 
Rémi Denis-Courmont
http://www.remlab.net/



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-09 16:23 [FFmpeg-devel] [PATCHv3 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
@ 2024-06-09 16:23 ` Rémi Denis-Courmont
  2024-06-10 11:41   ` Michael Niedermayer
  0 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-09 16:23 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libavcodec/mpegvideo.c | 44 ++++++++++--------------------------------
 1 file changed, 10 insertions(+), 34 deletions(-)

diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 7af823b8bd..9be0fecc8d 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -201,13 +201,11 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
-    int nCoeffs;
+    int qmul = qscale << 1;
+    int qadd, nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
 
-    qmul = qscale << 1;
-
     if (!s->h263_aic) {
         block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
         qadd = (qscale - 1) | 1;
@@ -215,47 +213,24 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
         qadd = 0;
     }
     if(s->ac_pred)
-        nCoeffs=63;
+        nCoeffs = 64;
     else
-        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
+        nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
 
-    for(i=1; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    s->h263dsp.h263_dct_unquantize_intra(block, nCoeffs, qmul, qadd);
 }
 
 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
+    int qmul = qscale << 1;
+    int qadd = (qscale - 1) | 1;
     int nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0);
 
-    qadd = (qscale - 1) | 1;
-    qmul = qscale << 1;
-
-    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-
-    for(i=0; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]] + 1;
+    s->h263dsp.h263_dct_unquantize_inter(block, nCoeffs, qmul, qadd);
 }
 
 
@@ -275,6 +250,7 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
 static av_cold int dct_init(MpegEncContext *s)
 {
     ff_blockdsp_init(&s->bdsp);
+    ff_h263dsp_init(&s->h263dsp);
     ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
     ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
 
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-09  9:27 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
@ 2024-06-09 16:13   ` Andreas Rheinhardt
  2024-06-09 16:39     ` Rémi Denis-Courmont
  0 siblings, 1 reply; 29+ messages in thread
From: Andreas Rheinhardt @ 2024-06-09 16:13 UTC (permalink / raw)
  To: ffmpeg-devel

Rémi Denis-Courmont:
> ---
>  libavcodec/mpegvideo.c | 30 +++++-------------------------
>  1 file changed, 5 insertions(+), 25 deletions(-)
> 
> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> index 7af823b8bd..fa25d14970 100644
> --- a/libavcodec/mpegvideo.c
> +++ b/libavcodec/mpegvideo.c
> @@ -201,7 +201,7 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
>  static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>                                    int16_t *block, int n, int qscale)
>  {
> -    int i, level, qmul, qadd;
> +    int qmul, qadd;
>      int nCoeffs;
>  
>      av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
> @@ -219,23 +219,13 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
>      else
>          nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
>  
> -    for(i=1; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    s->h263dsp.h263_dct_unquantize(block, 1, nCoeffs, qmul, qadd);
>  }
>  
>  static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>                                    int16_t *block, int n, int qscale)
>  {
> -    int i, level, qmul, qadd;
> +    int qmul, qadd;
>      int nCoeffs;
>  
>      av_assert2(s->block_last_index[n]>=0);
> @@ -244,18 +234,7 @@ static void dct_unquantize_h263_inter_c(MpegEncContext *s,
>      qmul = qscale << 1;
>  
>      nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
> -
> -    for(i=0; i<=nCoeffs; i++) {
> -        level = block[i];
> -        if (level) {
> -            if (level < 0) {
> -                level = level * qmul - qadd;
> -            } else {
> -                level = level * qmul + qadd;
> -            }
> -            block[i] = level;
> -        }
> -    }
> +    s->h263dsp.h263_dct_unquantize(block, 0, nCoeffs, qmul, qadd);
>  }
>  
>  
> @@ -275,6 +254,7 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
>  static av_cold int dct_init(MpegEncContext *s)
>  {
>      ff_blockdsp_init(&s->bdsp);
> +    ff_h263dsp_init(&s->h263dsp);
>      ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
>      ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
>  

This approach will make H.261 use a h263dsp function which is a misnomer
and will lead to undefined references if no H.263 decoder or encoder is
enabled.

- Andreas

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

* [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function
  2024-06-09  9:27 [FFmpeg-devel] [PATCHv2 1/4] lavc/h263dsp: add DCT dequantisation function Rémi Denis-Courmont
@ 2024-06-09  9:27 ` Rémi Denis-Courmont
  2024-06-09 16:13   ` Andreas Rheinhardt
  0 siblings, 1 reply; 29+ messages in thread
From: Rémi Denis-Courmont @ 2024-06-09  9:27 UTC (permalink / raw)
  To: ffmpeg-devel

---
 libavcodec/mpegvideo.c | 30 +++++-------------------------
 1 file changed, 5 insertions(+), 25 deletions(-)

diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 7af823b8bd..fa25d14970 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -201,7 +201,7 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
 static void dct_unquantize_h263_intra_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
+    int qmul, qadd;
     int nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
@@ -219,23 +219,13 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
     else
         nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
 
-    for(i=1; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    s->h263dsp.h263_dct_unquantize(block, 1, nCoeffs, qmul, qadd);
 }
 
 static void dct_unquantize_h263_inter_c(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
 {
-    int i, level, qmul, qadd;
+    int qmul, qadd;
     int nCoeffs;
 
     av_assert2(s->block_last_index[n]>=0);
@@ -244,18 +234,7 @@ static void dct_unquantize_h263_inter_c(MpegEncContext *s,
     qmul = qscale << 1;
 
     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-
-    for(i=0; i<=nCoeffs; i++) {
-        level = block[i];
-        if (level) {
-            if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
-            }
-            block[i] = level;
-        }
-    }
+    s->h263dsp.h263_dct_unquantize(block, 0, nCoeffs, qmul, qadd);
 }
 
 
@@ -275,6 +254,7 @@ static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
 static av_cold int dct_init(MpegEncContext *s)
 {
     ff_blockdsp_init(&s->bdsp);
+    ff_h263dsp_init(&s->h263dsp);
     ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
     ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
 
-- 
2.45.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2024-07-06 19:10 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-07-01 19:13 [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
2024-07-06 15:23   ` Andreas Rheinhardt
2024-07-06 16:10     ` Rémi Denis-Courmont
2024-07-06 16:20       ` Andreas Rheinhardt
2024-07-06 16:47         ` Rémi Denis-Courmont
2024-07-06 18:27           ` Andreas Rheinhardt
2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 3/4] checkasm/h263dsp: test dct_unquantize_{intra, inter} Rémi Denis-Courmont
2024-07-06 19:10   ` Andreas Rheinhardt
2024-07-01 19:13 ` [FFmpeg-devel] [PATCH 4/4] lavc/h263dsp: R-V V " Rémi Denis-Courmont
2024-07-06 10:54 ` [FFmpeg-devel] [PATCH 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
2024-07-06 15:17 ` Andreas Rheinhardt
2024-07-06 16:10   ` Rémi Denis-Courmont
2024-07-06 16:19     ` Andreas Rheinhardt
2024-07-06 17:28       ` Rémi Denis-Courmont
2024-07-06 18:23         ` Andreas Rheinhardt
  -- strict thread matches above, loose matches on Subject: below --
2024-06-12  4:47 [FFmpeg-devel] [PATCHv5 " Rémi Denis-Courmont
2024-06-12  4:47 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
2024-06-14 14:33   ` James Almer
2024-06-14 14:45     ` Rémi Denis-Courmont
2024-06-14 15:11       ` Rémi Denis-Courmont
2024-06-14 15:41         ` James Almer
2024-06-14 19:03           ` Rémi Denis-Courmont
2024-06-09 16:23 [FFmpeg-devel] [PATCHv3 1/4] lavc/h263dsp: add DCT dequantisation functions Rémi Denis-Courmont
2024-06-09 16:23 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
2024-06-10 11:41   ` Michael Niedermayer
2024-06-10 12:14     ` Rémi Denis-Courmont
2024-06-10 12:32       ` Michael Niedermayer
2024-06-09  9:27 [FFmpeg-devel] [PATCHv2 1/4] lavc/h263dsp: add DCT dequantisation function Rémi Denis-Courmont
2024-06-09  9:27 ` [FFmpeg-devel] [PATCH 2/4] lavc/mpegvideo: use H263DSP dequant function Rémi Denis-Courmont
2024-06-09 16:13   ` Andreas Rheinhardt
2024-06-09 16:39     ` Rémi Denis-Courmont

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git