From aa476bdf8172f053963a8e7b885852893c2e5d5c Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Mon, 24 Feb 2025 14:22:52 +0100
Subject: [PATCH 8/9] avcodec/simple_idct: Move ProRes-only code to proresdsp.c

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/proresdsp.c            | 52 +++++++++++++++++++++++++++++--
 libavcodec/simple_idct.c          | 37 ----------------------
 libavcodec/simple_idct.h          |  8 -----
 libavcodec/simple_idct_template.c |  4 +--
 libavcodec/tests/dct.c            |  3 +-
 5 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
index 20de1cab4f..17726a56e0 100644
--- a/libavcodec/proresdsp.c
+++ b/libavcodec/proresdsp.c
@@ -24,9 +24,55 @@
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
 #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
 #include "idctdsp.h"
 #include "proresdsp.h"
-#include "simple_idct.h"
+
+#define IN_IDCT_DEPTH 16
+#define PRORES_ONLY
+
+#define BIT_DEPTH 10
+#define EXTRA_SHIFT
+#include "simple_idct_template.c"
+#undef BIT_DEPTH
+#undef EXTRA_SHIFT
+
+#define BIT_DEPTH 12
+#include "simple_idct_template.c"
+#undef BIT_DEPTH
+
+/**
+ * Special version of ff_simple_idct_int16_10bit() which does dequantization
+ * and scales by a factor of 2 more between the two IDCTs to account
+ * for larger scale of input coefficients.
+ */
+static void prores_idct_10(int16_t *restrict block, const int16_t *restrict qmat)
+{
+    for (int i = 0; i < 64; i++)
+        block[i] *= qmat[i];
+
+    for (int i = 0; i < 8; i++)
+        idctRowCondDC_extrashift_10(block + i*8, 2);
+
+    for (int i = 0; i < 8; i++) {
+        block[i] += 8192;
+        idctSparseCol_extrashift_10(block + i);
+    }
+}
+
+static void prores_idct_12(int16_t *restrict block, const int16_t *restrict qmat)
+{
+    for (int i = 0; i < 64; i++)
+        block[i] *= qmat[i];
+
+    for (int i = 0; i < 8; i++)
+        idctRowCondDC_int16_12bit(block + i*8, 0);
+
+    for (int i = 0; i < 8; i++) {
+        block[i] += 8192;
+        idctSparseCol_int16_12bit(block + i);
+    }
+}
 
 #define CLIP_MIN (1 << 2)                     ///< minimum value for clipping resulting pixels
 #define CLIP_MAX_10 (1 << 10) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
@@ -65,13 +111,13 @@ static void put_pixels_12(uint16_t *dst, ptrdiff_t linesize, const int16_t *in)
 
 static void prores_idct_put_10_c(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
 {
-    ff_prores_idct_10(block, qmat);
+    prores_idct_10(block, qmat);
     put_pixels_10(out, linesize >> 1, block);
 }
 
 static void prores_idct_put_12_c(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat)
 {
-    ff_prores_idct_12(block, qmat);
+    prores_idct_12(block, qmat);
     put_pixels_12(out, linesize >> 1, block);
 }
 
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index eb13cff146..5253afc6df 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -37,11 +37,6 @@
 
 #define BIT_DEPTH 10
 #include "simple_idct_template.c"
-
-#define EXTRA_SHIFT  2
-#include "simple_idct_template.c"
-
-#undef EXTRA_SHIFT
 #undef BIT_DEPTH
 
 #define BIT_DEPTH 12
@@ -235,35 +230,3 @@ void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
         idct4col_add(dest + i, line_size, block + i);
     }
 }
-
-void ff_prores_idct_10(int16_t *block, const int16_t *qmat)
-{
-    int i;
-
-    for (i = 0; i < 64; i++)
-        block[i] *= qmat[i];
-
-    for (i = 0; i < 8; i++)
-        idctRowCondDC_extrashift_10(block + i*8, 2);
-
-    for (i = 0; i < 8; i++) {
-        block[i] += 8192;
-        idctSparseCol_extrashift_10(block + i);
-    }
-}
-
-void ff_prores_idct_12(int16_t *block, const int16_t *qmat)
-{
-    int i;
-
-    for (i = 0; i < 64; i++)
-        block[i] *= qmat[i];
-
-    for (i = 0; i < 8; i++)
-        idctRowCondDC_int16_12bit(block + i*8, 0);
-
-    for (i = 0; i < 8; i++) {
-        block[i] += 8192;
-        idctSparseCol_int16_12bit(block + i);
-    }
-}
diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h
index 20578b3347..a3f6cf9111 100644
--- a/libavcodec/simple_idct.h
+++ b/libavcodec/simple_idct.h
@@ -47,14 +47,6 @@ void ff_simple_idct_put_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t
 void ff_simple_idct_add_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
 void ff_simple_idct_int16_12bit(int16_t *block);
 
-/**
- * Special version of ff_simple_idct_int16_10bit() which does dequantization
- * and scales by a factor of 2 more between the two IDCTs to account
- * for larger scale of input coefficients.
- */
-void ff_prores_idct_10(int16_t *block, const int16_t *qmat);
-void ff_prores_idct_12(int16_t *block, const int16_t *qmat);
-
 void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
 
 void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index 887e3abb0b..e189ef1a8e 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -28,8 +28,6 @@
 /* Based upon some commented-out C code from mpeg2dec (idct_mmx.c
  * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>). */
 
-#include "simple_idct.h"
-
 #include "bit_depth_template.c"
 
 #undef W1
@@ -278,6 +276,7 @@ static inline void FUNC6(idctSparseCol)(idctin *col)
     col[56] = ((int)(a0 - b0) >> COL_SHIFT);
 }
 
+#ifndef PRORES_ONLY
 #ifndef EXTRA_SHIFT
 static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
                                           idctin *col)
@@ -369,3 +368,4 @@ void FUNC6(ff_simple_idct)(int16_t *block)
 }
 #endif
 #endif
+#endif /* PRORES_ONLY */
diff --git a/libavcodec/tests/dct.c b/libavcodec/tests/dct.c
index 17a0814459..76ad42bdfa 100644
--- a/libavcodec/tests/dct.c
+++ b/libavcodec/tests/dct.c
@@ -52,6 +52,7 @@
 #include "libavcodec/faandct.h"
 #include "libavcodec/faanidct.h"
 #include "libavcodec/dctref.h"
+#include "libavcodec/proresdsp.c"
 
 struct algo {
     const char *name;
@@ -77,7 +78,7 @@ static void ff_prores_idct_wrap(int16_t *dst){
     for(i=0; i<64; i++){
         qmat[i]=4;
     }
-    ff_prores_idct_10(dst, qmat);
+    prores_idct_10(dst, qmat);
     for(i=0; i<64; i++) {
          dst[i] -= 512;
     }
-- 
2.45.2