[FFmpeg-devel] [PATCH 1/9] avcodec/proresdec: Don't use LONG_BITSTREAM_READER

From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH 1/9] avcodec/proresdec: Don't use LONG_BITSTREAM_READER
Date: Mon, 24 Feb 2025 15:29:57 +0100
Message-ID: <AS8P250MB074498967AEE0435206019928FC02@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM> (raw)

[-- Attachment #1: Type: text/plain, Size: 16 bytes --]

Patches attached

[-- Attachment #2: 0001-avcodec-proresdec-Don-t-use-LONG_BITSTREAM_READER.patch --]
[-- Type: text/x-patch, Size: 2712 bytes --]

From d94e2667b7f3b907a127f93c8075f864071a52df Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 23 Feb 2025 23:21:00 +0100
Subject: [PATCH 1/9] avcodec/proresdec: Don't use LONG_BITSTREAM_READER

Using LONG_BITSTREAM_READER means that every get_bits() call
uses an AV_RB64() to ensure that cache always contains 32 valid bits
(as opposed to the ordinary 25 guaranteed by reading 32 bits);
yet this is unnecessary when unpacking alpha. So only use these
64bit reads where necessary.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/proresdec.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index 6a256107b4..b2517bd5a4 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -26,8 +26,6 @@
 
 //#define DEBUG
 
-#define LONG_BITSTREAM_READER
-
 #include "config_components.h"
 
 #include "libavutil/internal.h"
@@ -428,7 +426,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
         unsigned int rice_order, exp_order, switch_bits;                \
         unsigned int q, buf, bits;                                      \
                                                                         \
-        UPDATE_CACHE(re, gb);                                           \
+        UPDATE_CACHE_32(re, gb); /* We really need 32 bits */           \
         buf = GET_CACHE(re, gb);                                        \
                                                                         \
         /* number of bits to switch between rice and exp golomb */      \
@@ -440,7 +438,7 @@ static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, cons
                                                                         \
         if (q > switch_bits) { /* exp golomb */                         \
             bits = exp_order - switch_bits + (q<<1);                    \
-            if (bits > FFMIN(MIN_CACHE_BITS, 31))                       \
+            if (bits > 31)                                              \
                 return AVERROR_INVALIDDATA;                             \
             val = SHOW_UBITS(re, gb, bits) - (1 << exp_order) +         \
                 ((switch_bits + 1) << rice_order);                      \
@@ -502,7 +500,7 @@ static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContex
     int log2_block_count = av_log2(blocks_per_slice);
 
     OPEN_READER(re, gb);
-    UPDATE_CACHE(re, gb);                                           \
+    UPDATE_CACHE_32(re, gb);
     run   = 4;
     level = 2;
 
-- 
2.45.2


[-- Attachment #3: 0002-avcodec-get_bits-Remove-LONG_BITSTREAM_READER.patch --]
[-- Type: text/x-patch, Size: 1740 bytes --]

From 4f28ef1c165a9ca65b3132065ee8db1e72a5f63b Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Sun, 23 Feb 2025 23:26:33 +0100
Subject: [PATCH 2/9] avcodec/get_bits: Remove LONG_BITSTREAM_READER

No longer used anywhere.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/get_bits.h | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index fe2f6378b4..39d8e5bc1e 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -163,11 +163,7 @@ static inline unsigned int show_bits(GetBitContext *s, int n);
  * For examples see get_bits, show_bits, skip_bits, get_vlc.
  */
 
-#if defined LONG_BITSTREAM_READER
-#   define MIN_CACHE_BITS 32
-#else
-#   define MIN_CACHE_BITS 25
-#endif
+#define MIN_CACHE_BITS 25
 
 #define OPEN_READER_NOSIZE(name, gb)            \
     unsigned int name ## _index = (gb)->index;  \
@@ -195,24 +191,11 @@ static inline unsigned int show_bits(GetBitContext *s, int n);
 
 /* Using these two macros ensures that 32 bits are available. */
 # define UPDATE_CACHE_LE_32(name, gb) UPDATE_CACHE_LE_EXT(name, (gb), 64, 32)
-
 # define UPDATE_CACHE_BE_32(name, gb) UPDATE_CACHE_BE_EXT(name, (gb), 64, 32)
 
-# ifdef LONG_BITSTREAM_READER
-
-# define UPDATE_CACHE_LE(name, gb) UPDATE_CACHE_LE_32(name, (gb))
-
-# define UPDATE_CACHE_BE(name, gb) UPDATE_CACHE_BE_32(name, (gb))
-
-#else
-
 # define UPDATE_CACHE_LE(name, gb) UPDATE_CACHE_LE_EXT(name, (gb), 32, 32)
-
 # define UPDATE_CACHE_BE(name, gb) UPDATE_CACHE_BE_EXT(name, (gb), 32, 32)
 
-#endif
-
-
 #ifdef BITSTREAM_READER_LE
 
 # define UPDATE_CACHE(name, gb) UPDATE_CACHE_LE(name, gb)
-- 
2.45.2


[-- Attachment #4: 0003-avcodec-proresdec-Deduplicate-debug-message.patch --]
[-- Type: text/x-patch, Size: 1280 bytes --]

From c0e3ef3881ffd9f31cdfd2756811ce42177ca276 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 24 Feb 2025 00:18:56 +0100
Subject: [PATCH 3/9] avcodec/proresdec: Deduplicate debug message

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/proresdec.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index b2517bd5a4..f534b7b6b1 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -164,11 +164,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_WARNING, "Unknown prores profile %d\n", avctx->codec_tag);
     }
 
-    if (avctx->bits_per_raw_sample == 10) {
-        av_log(avctx, AV_LOG_DEBUG, "Auto bitdepth precision. Use 10b decoding based on codec tag.\n");
-    } else { /* 12b */
-        av_log(avctx, AV_LOG_DEBUG, "Auto bitdepth precision. Use 12b decoding based on codec tag.\n");
-    }
+    av_log(avctx, AV_LOG_DEBUG,
+           "Auto bitdepth precision. Use %db decoding based on codec tag.\n",
+           avctx->bits_per_raw_sample);
 
     ff_blockdsp_init(&ctx->bdsp);
     ret = ff_proresdsp_init(&ctx->prodsp, avctx->bits_per_raw_sample);
-- 
2.45.2


[-- Attachment #5: 0004-avcodec-prores-dec-dsp-Remove-always-false-checks.patch --]
[-- Type: text/x-patch, Size: 4549 bytes --]

From 175c3fbf6e00a33270a583301d2d8178f9e0bc62 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 24 Feb 2025 11:35:33 +0100
Subject: [PATCH 4/9] avcodec/prores{dec,dsp}: Remove always-false checks

avctx->bits_per_raw_sample is always 10 or 12 here;
the checks have been added in preparation for making
bits_per_raw_sample user-settable via an AVOption,
but this never happened.

While just at it, also set unpack_alpha earlier
(where bits_per_raw_sample is set).

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/proresdec.c | 20 +++++---------------
 libavcodec/proresdsp.c |  9 ++++-----
 libavcodec/proresdsp.h |  2 +-
 3 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index f534b7b6b1..75cc2dcb6a 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -132,7 +132,6 @@ static void unpack_alpha_12(GetBitContext *gb, uint16_t *dst, int num_coeffs,
 
 static av_cold int decode_init(AVCodecContext *avctx)
 {
-    int ret = 0;
     ProresContext *ctx = avctx->priv_data;
     uint8_t idct_permutation[64];
 
@@ -164,16 +163,15 @@ static av_cold int decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_WARNING, "Unknown prores profile %d\n", avctx->codec_tag);
     }
 
+    ctx->unpack_alpha = avctx->bits_per_raw_sample == 10 ?
+                            unpack_alpha_10 : unpack_alpha_12;
+
     av_log(avctx, AV_LOG_DEBUG,
            "Auto bitdepth precision. Use %db decoding based on codec tag.\n",
            avctx->bits_per_raw_sample);
 
     ff_blockdsp_init(&ctx->bdsp);
-    ret = ff_proresdsp_init(&ctx->prodsp, avctx->bits_per_raw_sample);
-    if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Fail to init proresdsp for bits per raw sample %d\n", avctx->bits_per_raw_sample);
-        return ret;
-    }
+    ff_proresdsp_init(&ctx->prodsp, avctx->bits_per_raw_sample);
 
     ff_init_scantable_permutation(idct_permutation,
                                   ctx->prodsp.idct_permutation_type);
@@ -183,15 +181,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     ctx->pix_fmt = AV_PIX_FMT_NONE;
 
-    if (avctx->bits_per_raw_sample == 10){
-        ctx->unpack_alpha = unpack_alpha_10;
-    } else if (avctx->bits_per_raw_sample == 12){
-        ctx->unpack_alpha = unpack_alpha_12;
-    } else {
-        av_log(avctx, AV_LOG_ERROR, "Fail to set unpack_alpha for bits per raw sample %d\n", avctx->bits_per_raw_sample);
-        return AVERROR_BUG;
-    }
-    return ret;
+    return 0;
 }
 
 static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
index bc253e55f7..d20b9d938a 100644
--- a/libavcodec/proresdsp.c
+++ b/libavcodec/proresdsp.c
@@ -22,6 +22,7 @@
 
 #include "config.h"
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "idctdsp.h"
 #include "proresdsp.h"
@@ -76,16 +77,15 @@ static void prores_idct_put_12_c(uint16_t *out, ptrdiff_t linesize, int16_t *blo
     put_pixels_12(out, linesize >> 1, block);
 }
 
-av_cold int ff_proresdsp_init(ProresDSPContext *dsp, int bits_per_raw_sample)
+av_cold void ff_proresdsp_init(ProresDSPContext *dsp, int bits_per_raw_sample)
 {
     if (bits_per_raw_sample == 10) {
         dsp->idct_put = prores_idct_put_10_c;
         dsp->idct_permutation_type = FF_IDCT_PERM_NONE;
-    } else if (bits_per_raw_sample == 12) {
+    } else {
+        av_assert1(bits_per_raw_sample == 12);
         dsp->idct_put = prores_idct_put_12_c;
         dsp->idct_permutation_type = FF_IDCT_PERM_NONE;
-    } else {
-        return AVERROR_BUG;
     }
 
 #if ARCH_X86
@@ -94,5 +94,4 @@ av_cold int ff_proresdsp_init(ProresDSPContext *dsp, int bits_per_raw_sample)
 
     ff_init_scantable_permutation(dsp->idct_permutation,
                                   dsp->idct_permutation_type);
-    return 0;
 }
diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h
index 966ba3d797..ef09d86380 100644
--- a/libavcodec/proresdsp.h
+++ b/libavcodec/proresdsp.h
@@ -32,7 +32,7 @@ typedef struct ProresDSPContext {
     void (*idct_put)(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat);
 } ProresDSPContext;
 
-int ff_proresdsp_init(ProresDSPContext *dsp, int bits_per_raw_sample);
+void ff_proresdsp_init(ProresDSPContext *dsp, int bits_per_raw_sample);
 
 void ff_proresdsp_init_x86(ProresDSPContext *dsp, int bits_per_raw_sample);
 
-- 
2.45.2


[-- Attachment #6: 0005-avcodec-proresdec-Disable-HWAccel-at-compile-time-wh.patch --]
[-- Type: text/x-patch, Size: 1970 bytes --]

From d479504d103962d1c74496885dcba1a21729f6cc Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 24 Feb 2025 11:51:09 +0100
Subject: [PATCH 5/9] avcodec/proresdec: Disable HWAccel at compile-time when
 possible

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/proresdec.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index 75cc2dcb6a..18cfc25279 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -254,6 +254,7 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
 
     if (pix_fmt != ctx->pix_fmt) {
 #define HWACCEL_MAX (CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL)
+#if HWACCEL_MAX
         enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
         int ret;
 
@@ -269,6 +270,9 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
             return ret;
 
         avctx->pix_fmt = ret;
+#else
+        avctx->pix_fmt = ctx->pix_fmt = pix_fmt;
+#endif
     }
 
     ctx->frame->color_primaries = buf[14];
@@ -789,7 +793,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
         return ret;
     ff_thread_finish_setup(avctx);
 
-    if (avctx->hwaccel) {
+    if (HWACCEL_MAX && avctx->hwaccel) {
         const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
         ret = hwaccel->start_frame(avctx, NULL, 0);
         if (ret < 0)
@@ -862,10 +866,12 @@ const FFCodec ff_prores_decoder = {
     UPDATE_THREAD_CONTEXT(update_thread_context),
     .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
     .p.profiles     = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
+#if HWACCEL_MAX
     .hw_configs     = (const AVCodecHWConfigInternal *const []) {
 #if CONFIG_PRORES_VIDEOTOOLBOX_HWACCEL
         HWACCEL_VIDEOTOOLBOX(prores),
 #endif
         NULL
     },
+#endif
 };
-- 
2.45.2


[-- Attachment #7: 0006-avcodec-proresdsp-Make-put_pixels-truely-ptrdiff_t-c.patch --]
[-- Type: text/x-patch, Size: 1572 bytes --]

From 54061804fcb2991feb3fbb447757390e2f18b7d3 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 24 Feb 2025 12:51:16 +0100
Subject: [PATCH 6/9] avcodec/proresdsp: Make put_pixels truely ptrdiff_t
 compatible

It currently uses an intermediate int which wouldn't work
if linesize exceeded the range of int and inhibits compiler
optimizations. Also switch to pointer arithmetic and use
smaller scope.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/proresdsp.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
index d20b9d938a..20de1cab4f 100644
--- a/libavcodec/proresdsp.c
+++ b/libavcodec/proresdsp.c
@@ -40,16 +40,14 @@
  */
 
 static inline void put_pixel(uint16_t *dst, ptrdiff_t linesize, const int16_t *in, int bits_per_raw_sample) {
-    int x, y, src_offset, dst_offset;
-
-    for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += linesize) {
-        for (x = 0; x < 8; x++) {
-            src_offset = (y << 3) + x;
+    for (int y = 0; y < 8; y++, dst += linesize) {
+        for (int x = 0; x < 8; x++) {
+            int src_offset = (y << 3) + x;
 
             if (bits_per_raw_sample == 10) {
-                dst[dst_offset + x] = CLIP_10(in[src_offset]);
+                dst[x] = CLIP_10(in[src_offset]);
             } else {//12b
-                dst[dst_offset + x] = CLIP_12(in[src_offset]);
+                dst[x] = CLIP_12(in[src_offset]);
             }
         }
     }
-- 
2.45.2


[-- Attachment #8: 0007-avcodec-simple_idct_template-Move-function-to-its-de.patch --]
[-- Type: text/x-patch, Size: 2331 bytes --]

From 591f2cb42bfe4eb0e2ba181d8fc9a0a7477a3d29 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 24 Feb 2025 13:41:35 +0100
Subject: [PATCH 7/9] avcodec/simple_idct_template: Move function to its
 declaration

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/simple_idct_template.c | 36 +++++++++++++++----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index 5ddd0b45a2..887e3abb0b 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -261,6 +261,24 @@ static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
 #ifdef EXTRA_SHIFT
 static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
 #else
+static inline void FUNC6(idctSparseCol)(idctin *col)
+#endif
+{
+    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
+
+    IDCT_COLS;
+
+    col[0 ] = ((int)(a0 + b0) >> COL_SHIFT);
+    col[8 ] = ((int)(a1 + b1) >> COL_SHIFT);
+    col[16] = ((int)(a2 + b2) >> COL_SHIFT);
+    col[24] = ((int)(a3 + b3) >> COL_SHIFT);
+    col[32] = ((int)(a3 - b3) >> COL_SHIFT);
+    col[40] = ((int)(a2 - b2) >> COL_SHIFT);
+    col[48] = ((int)(a1 - b1) >> COL_SHIFT);
+    col[56] = ((int)(a0 - b0) >> COL_SHIFT);
+}
+
+#ifndef EXTRA_SHIFT
 static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
                                           idctin *col)
 {
@@ -309,24 +327,6 @@ static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
     dest[0] = av_clip_pixel(dest[0] + ((int)(a0 - b0) >> COL_SHIFT));
 }
 
-static inline void FUNC6(idctSparseCol)(idctin *col)
-#endif
-{
-    unsigned a0, a1, a2, a3, b0, b1, b2, b3;
-
-    IDCT_COLS;
-
-    col[0 ] = ((int)(a0 + b0) >> COL_SHIFT);
-    col[8 ] = ((int)(a1 + b1) >> COL_SHIFT);
-    col[16] = ((int)(a2 + b2) >> COL_SHIFT);
-    col[24] = ((int)(a3 + b3) >> COL_SHIFT);
-    col[32] = ((int)(a3 - b3) >> COL_SHIFT);
-    col[40] = ((int)(a2 - b2) >> COL_SHIFT);
-    col[48] = ((int)(a1 - b1) >> COL_SHIFT);
-    col[56] = ((int)(a0 - b0) >> COL_SHIFT);
-}
-
-#ifndef EXTRA_SHIFT
 void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_)
 {
     idctin *block = (idctin *)block_;
-- 
2.45.2


[-- Attachment #9: 0008-avcodec-simple_idct-Move-ProRes-only-code-to-proresd.patch --]
[-- Type: text/x-patch, Size: 6495 bytes --]

From aa476bdf8172f053963a8e7b885852893c2e5d5c Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 24 Feb 2025 14:22:52 +0100
Subject: [PATCH 8/9] avcodec/simple_idct: Move ProRes-only code to proresdsp.c

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/proresdsp.c            | 52 +++++++++++++++++++++++++++++--
 libavcodec/simple_idct.c          | 37 ----------------------
 libavcodec/simple_idct.h          |  8 -----
 libavcodec/simple_idct_template.c |  4 +--
 libavcodec/tests/dct.c            |  3 +-
 5 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
index 20de1cab4f..17726a56e0 100644
--- a/libavcodec/proresdsp.c
+++ b/libavcodec/proresdsp.c
@@ -24,9 +24,55 @@
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
 #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
 #include "idctdsp.h"
 #include "proresdsp.h"
-#include "simple_idct.h"
+
+#define IN_IDCT_DEPTH 16
+#define PRORES_ONLY
+
+#define BIT_DEPTH 10
+#define EXTRA_SHIFT
+#include "simple_idct_template.c"
+#undef BIT_DEPTH
+#undef EXTRA_SHIFT
+
+#define BIT_DEPTH 12
+#include "simple_idct_template.c"
+#undef BIT_DEPTH
+
+/**
+ * Special version of ff_simple_idct_int16_10bit() which does dequantization
+ * and scales by a factor of 2 more between the two IDCTs to account
+ * for larger scale of input coefficients.
+ */
+static void prores_idct_10(int16_t *restrict block, const int16_t *restrict qmat)
+{
+    for (int i = 0; i < 64; i++)
+        block[i] *= qmat[i];
+
+    for (int i = 0; i < 8; i++)
+        idctRowCondDC_extrashift_10(block + i*8, 2);
+
+    for (int i = 0; i < 8; i++) {
+        block[i] += 8192;
+        idctSparseCol_extrashift_10(block + i);
+    }
+}
+
+static void prores_idct_12(int16_t *restrict block, const int16_t *restrict qmat)
+{
+    for (int i = 0; i < 64; i++)
+        block[i] *= qmat[i];
+
+    for (int i = 0; i < 8; i++)
+        idctRowCondDC_int16_12bit(block + i*8, 0);
+
+    for (int i = 0; i < 8; i++) {
+        block[i] += 8192;
+        idctSparseCol_int16_12bit(block + i);
+    }
+}
 
 #define CLIP_MIN (1 << 2)                     ///< minimum value for clipping resulting pixels
 #define CLIP_MAX_10 (1 << 10) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
@@ -65,13 +111,13 @@ static void put_pixels_12(uint16_t *dst, ptrdiff_t linesize, const int16_t *in)
 
 static void prores_idct_put_10_c(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
 {
-    ff_prores_idct_10(block, qmat);
+    prores_idct_10(block, qmat);
     put_pixels_10(out, linesize >> 1, block);
 }
 
 static void prores_idct_put_12_c(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
 {
-    ff_prores_idct_12(block, qmat);
+    prores_idct_12(block, qmat);
     put_pixels_12(out, linesize >> 1, block);
 }
 
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index eb13cff146..5253afc6df 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -37,11 +37,6 @@
 
 #define BIT_DEPTH 10
 #include "simple_idct_template.c"
-
-#define EXTRA_SHIFT  2
-#include "simple_idct_template.c"
-
-#undef EXTRA_SHIFT
 #undef BIT_DEPTH
 
 #define BIT_DEPTH 12
@@ -235,35 +230,3 @@ void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
         idct4col_add(dest + i, line_size, block + i);
     }
 }
-
-void ff_prores_idct_10(int16_t *block, const int16_t *qmat)
-{
-    int i;
-
-    for (i = 0; i < 64; i++)
-        block[i] *= qmat[i];
-
-    for (i = 0; i < 8; i++)
-        idctRowCondDC_extrashift_10(block + i*8, 2);
-
-    for (i = 0; i < 8; i++) {
-        block[i] += 8192;
-        idctSparseCol_extrashift_10(block + i);
-    }
-}
-
-void ff_prores_idct_12(int16_t *block, const int16_t *qmat)
-{
-    int i;
-
-    for (i = 0; i < 64; i++)
-        block[i] *= qmat[i];
-
-    for (i = 0; i < 8; i++)
-        idctRowCondDC_int16_12bit(block + i*8, 0);
-
-    for (i = 0; i < 8; i++) {
-        block[i] += 8192;
-        idctSparseCol_int16_12bit(block + i);
-    }
-}
diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h
index 20578b3347..a3f6cf9111 100644
--- a/libavcodec/simple_idct.h
+++ b/libavcodec/simple_idct.h
@@ -47,14 +47,6 @@ void ff_simple_idct_put_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t
 void ff_simple_idct_add_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
 void ff_simple_idct_int16_12bit(int16_t *block);
 
-/**
- * Special version of ff_simple_idct_int16_10bit() which does dequantization
- * and scales by a factor of 2 more between the two IDCTs to account
- * for larger scale of input coefficients.
- */
-void ff_prores_idct_10(int16_t *block, const int16_t *qmat);
-void ff_prores_idct_12(int16_t *block, const int16_t *qmat);
-
 void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
 
 void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index 887e3abb0b..e189ef1a8e 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -28,8 +28,6 @@
 /* Based upon some commented-out C code from mpeg2dec (idct_mmx.c
  * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>). */
 
-#include "simple_idct.h"
-
 #include "bit_depth_template.c"
 
 #undef W1
@@ -278,6 +276,7 @@ static inline void FUNC6(idctSparseCol)(idctin *col)
     col[56] = ((int)(a0 - b0) >> COL_SHIFT);
 }
 
+#ifndef PRORES_ONLY
 #ifndef EXTRA_SHIFT
 static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
                                           idctin *col)
@@ -369,3 +368,4 @@ void FUNC6(ff_simple_idct)(int16_t *block)
 }
 #endif
 #endif
+#endif /* PRORES_ONLY */
diff --git a/libavcodec/tests/dct.c b/libavcodec/tests/dct.c
index 17a0814459..76ad42bdfa 100644
--- a/libavcodec/tests/dct.c
+++ b/libavcodec/tests/dct.c
@@ -52,6 +52,7 @@
 #include "libavcodec/faandct.h"
 #include "libavcodec/faanidct.h"
 #include "libavcodec/dctref.h"
+#include "libavcodec/proresdsp.c"
 
 struct algo {
     const char *name;
@@ -77,7 +78,7 @@ static void ff_prores_idct_wrap(int16_t *dst){
     for(i=0; i<64; i++){
         qmat[i]=4;
     }
-    ff_prores_idct_10(dst, qmat);
+    prores_idct_10(dst, qmat);
     for(i=0; i<64; i++) {
          dst[i] -= 512;
     }
-- 
2.45.2


[-- Attachment #10: 0009-avcodec-proresdec-Reuse-permutation.patch --]
[-- Type: text/x-patch, Size: 1695 bytes --]

From 7e15bc05d247c2d5967ee9a223694a473def29c4 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 24 Feb 2025 14:40:00 +0100
Subject: [PATCH 9/9] avcodec/proresdec: Reuse permutation

The ProresDSPContext already contains the idct_permutation.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/proresdec.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec.c
index 18cfc25279..01caa611a0 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec.c
@@ -133,7 +133,6 @@ static void unpack_alpha_12(GetBitContext *gb, uint16_t *dst, int num_coeffs,
 static av_cold int decode_init(AVCodecContext *avctx)
 {
     ProresContext *ctx = avctx->priv_data;
-    uint8_t idct_permutation[64];
 
     avctx->bits_per_raw_sample = 10;
 
@@ -173,11 +172,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
     ff_blockdsp_init(&ctx->bdsp);
     ff_proresdsp_init(&ctx->prodsp, avctx->bits_per_raw_sample);
 
-    ff_init_scantable_permutation(idct_permutation,
-                                  ctx->prodsp.idct_permutation_type);
-
-    ff_permute_scantable(ctx->progressive_scan, ff_prores_progressive_scan, idct_permutation);
-    ff_permute_scantable(ctx->interlaced_scan, ff_prores_interlaced_scan, idct_permutation);
+    ff_permute_scantable(ctx->progressive_scan, ff_prores_progressive_scan,
+                         ctx->prodsp.idct_permutation);
+    ff_permute_scantable(ctx->interlaced_scan,  ff_prores_interlaced_scan,
+                         ctx->prodsp.idct_permutation);
 
     ctx->pix_fmt = AV_PIX_FMT_NONE;
 
-- 
2.45.2


[-- Attachment #11: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".