From aa476bdf8172f053963a8e7b885852893c2e5d5c Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Mon, 24 Feb 2025 14:22:52 +0100 Subject: [PATCH 8/9] avcodec/simple_idct: Move ProRes-only code to proresdsp.c Signed-off-by: Andreas Rheinhardt --- libavcodec/proresdsp.c | 52 +++++++++++++++++++++++++++++-- libavcodec/simple_idct.c | 37 ---------------------- libavcodec/simple_idct.h | 8 ----- libavcodec/simple_idct_template.c | 4 +-- libavcodec/tests/dct.c | 3 +- 5 files changed, 53 insertions(+), 51 deletions(-) diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c index 20de1cab4f..17726a56e0 100644 --- a/libavcodec/proresdsp.c +++ b/libavcodec/proresdsp.c @@ -24,9 +24,55 @@ #include "libavutil/attributes.h" #include "libavutil/avassert.h" #include "libavutil/common.h" +#include "libavutil/intreadwrite.h" #include "idctdsp.h" #include "proresdsp.h" -#include "simple_idct.h" + +#define IN_IDCT_DEPTH 16 +#define PRORES_ONLY + +#define BIT_DEPTH 10 +#define EXTRA_SHIFT +#include "simple_idct_template.c" +#undef BIT_DEPTH +#undef EXTRA_SHIFT + +#define BIT_DEPTH 12 +#include "simple_idct_template.c" +#undef BIT_DEPTH + +/** + * Special version of ff_simple_idct_int16_10bit() which does dequantization + * and scales by a factor of 2 more between the two IDCTs to account + * for larger scale of input coefficients. + */ +static void prores_idct_10(int16_t *restrict block, const int16_t *restrict qmat) +{ + for (int i = 0; i < 64; i++) + block[i] *= qmat[i]; + + for (int i = 0; i < 8; i++) + idctRowCondDC_extrashift_10(block + i*8, 2); + + for (int i = 0; i < 8; i++) { + block[i] += 8192; + idctSparseCol_extrashift_10(block + i); + } +} + +static void prores_idct_12(int16_t *restrict block, const int16_t *restrict qmat) +{ + for (int i = 0; i < 64; i++) + block[i] *= qmat[i]; + + for (int i = 0; i < 8; i++) + idctRowCondDC_int16_12bit(block + i*8, 0); + + for (int i = 0; i < 8; i++) { + block[i] += 8192; + idctSparseCol_int16_12bit(block + i); + } +} #define CLIP_MIN (1 << 2) ///< minimum value for clipping resulting pixels #define CLIP_MAX_10 (1 << 10) - CLIP_MIN - 1 ///< maximum value for clipping resulting pixels @@ -65,13 +111,13 @@ static void put_pixels_12(uint16_t *dst, ptrdiff_t linesize, const int16_t *in) static void prores_idct_put_10_c(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat) { - ff_prores_idct_10(block, qmat); + prores_idct_10(block, qmat); put_pixels_10(out, linesize >> 1, block); } static void prores_idct_put_12_c(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat) { - ff_prores_idct_12(block, qmat); + prores_idct_12(block, qmat); put_pixels_12(out, linesize >> 1, block); } diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c index eb13cff146..5253afc6df 100644 --- a/libavcodec/simple_idct.c +++ b/libavcodec/simple_idct.c @@ -37,11 +37,6 @@ #define BIT_DEPTH 10 #include "simple_idct_template.c" - -#define EXTRA_SHIFT 2 -#include "simple_idct_template.c" - -#undef EXTRA_SHIFT #undef BIT_DEPTH #define BIT_DEPTH 12 @@ -235,35 +230,3 @@ void ff_simple_idct44_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) idct4col_add(dest + i, line_size, block + i); } } - -void ff_prores_idct_10(int16_t *block, const int16_t *qmat) -{ - int i; - - for (i = 0; i < 64; i++) - block[i] *= qmat[i]; - - for (i = 0; i < 8; i++) - idctRowCondDC_extrashift_10(block + i*8, 2); - - for (i = 0; i < 8; i++) { - block[i] += 8192; - idctSparseCol_extrashift_10(block + i); - } -} - -void ff_prores_idct_12(int16_t *block, const int16_t *qmat) -{ - int i; - - for (i = 0; i < 64; i++) - block[i] *= qmat[i]; - - for (i = 0; i < 8; i++) - idctRowCondDC_int16_12bit(block + i*8, 0); - - for (i = 0; i < 8; i++) { - block[i] += 8192; - idctSparseCol_int16_12bit(block + i); - } -} diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h index 20578b3347..a3f6cf9111 100644 --- a/libavcodec/simple_idct.h +++ b/libavcodec/simple_idct.h @@ -47,14 +47,6 @@ void ff_simple_idct_put_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t void ff_simple_idct_add_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_int16_12bit(int16_t *block); -/** - * Special version of ff_simple_idct_int16_10bit() which does dequantization - * and scales by a factor of 2 more between the two IDCTs to account - * for larger scale of input coefficients. - */ -void ff_prores_idct_10(int16_t *block, const int16_t *qmat); -void ff_prores_idct_12(int16_t *block, const int16_t *qmat); - void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block); diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c index 887e3abb0b..e189ef1a8e 100644 --- a/libavcodec/simple_idct_template.c +++ b/libavcodec/simple_idct_template.c @@ -28,8 +28,6 @@ /* Based upon some commented-out C code from mpeg2dec (idct_mmx.c * written by Aaron Holtzman ). */ -#include "simple_idct.h" - #include "bit_depth_template.c" #undef W1 @@ -278,6 +276,7 @@ static inline void FUNC6(idctSparseCol)(idctin *col) col[56] = ((int)(a0 - b0) >> COL_SHIFT); } +#ifndef PRORES_ONLY #ifndef EXTRA_SHIFT static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, idctin *col) @@ -369,3 +368,4 @@ void FUNC6(ff_simple_idct)(int16_t *block) } #endif #endif +#endif /* PRORES_ONLY */ diff --git a/libavcodec/tests/dct.c b/libavcodec/tests/dct.c index 17a0814459..76ad42bdfa 100644 --- a/libavcodec/tests/dct.c +++ b/libavcodec/tests/dct.c @@ -52,6 +52,7 @@ #include "libavcodec/faandct.h" #include "libavcodec/faanidct.h" #include "libavcodec/dctref.h" +#include "libavcodec/proresdsp.c" struct algo { const char *name; @@ -77,7 +78,7 @@ static void ff_prores_idct_wrap(int16_t *dst){ for(i=0; i<64; i++){ qmat[i]=4; } - ff_prores_idct_10(dst, qmat); + prores_idct_10(dst, qmat); for(i=0; i<64; i++) { dst[i] -= 512; } -- 2.45.2