[FFmpeg-devel] [PATCH] vulkan/prores: Adopt the same IDCT routine as the prores-raw hwaccel (PR #20819)

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

From: averne via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: averne <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH] vulkan/prores: Adopt the same IDCT routine as the prores-raw hwaccel (PR #20819)
Date: Sun, 02 Nov 2025 19:27:33 -0000
Message-ID: <176211165375.25.10975636295279296971@2cb04c0e5124> (raw)

PR #20819 opened by averne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20819
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/20819.patch

The added rounding at the final output conforms
to the SMPTE document and reduces the deviation
against the software decoder.


>From 7639b6fd0cec3e7ae31f1d0c2d1fc491dbd937e5 Mon Sep 17 00:00:00 2001
From: averne <averne381@gmail.com>
Date: Sun, 2 Nov 2025 20:23:28 +0100
Subject: [PATCH] vulkan/prores: Adopt the same IDCT routine as the prores-raw
 hwaccel

The added rounding at the final output conforms
to the SMPTE document and reduces the deviation
against the software decoder.
---
 libavcodec/vulkan/prores_idct.comp | 105 +++++++++++++++++++----------
 1 file changed, 68 insertions(+), 37 deletions(-)

diff --git a/libavcodec/vulkan/prores_idct.comp b/libavcodec/vulkan/prores_idct.comp
index 642fcb5bd5..8ad3b7f58b 100644
--- a/libavcodec/vulkan/prores_idct.comp
+++ b/libavcodec/vulkan/prores_idct.comp
@@ -37,47 +37,77 @@ void put_px(uint tex_idx, ivec2 pos, uint v)
 #endif
 }
 
+const float idct_8x8_scales[] = {
+    0.353553390593274f, // cos(4 * pi/16) / 2
+    0.490392640201615f, // cos(1 * pi/16) / 2
+    0.461939766255643f, // cos(2 * pi/16) / 2
+    0.415734806151273f, // cos(3 * pi/16) / 2
+    0.353553390593274f, // cos(4 * pi/16) / 2
+    0.277785116509801f, // cos(5 * pi/16) / 2
+    0.191341716182545f, // cos(6 * pi/16) / 2
+    0.097545161008064f, // cos(7 * pi/16) / 2
+};
+
 /* 7.4 Inverse Transform */
 void idct(uint block, uint offset, uint stride)
 {
-    float c0 = blocks[block][0*stride + offset];
-    float c1 = blocks[block][1*stride + offset];
-    float c2 = blocks[block][2*stride + offset];
-    float c3 = blocks[block][3*stride + offset];
-    float c4 = blocks[block][4*stride + offset];
-    float c5 = blocks[block][5*stride + offset];
-    float c6 = blocks[block][6*stride + offset];
-    float c7 = blocks[block][7*stride + offset];
+    float t0, t1, t2, t3, t4, t5, t6, t7, u8;
+    float u0, u1, u2, u3, u4, u5, u6, u7;
 
-    float tmp1 = c6 * 1.4142134189605712891 + (c2 - c6);
-    float tmp2 = c6 * 1.4142134189605712891 - (c2 - c6);
+    /* Input */
+    t0 = blocks[block][0*stride + offset];
+    u4 = blocks[block][1*stride + offset];
+    t2 = blocks[block][2*stride + offset];
+    u6 = blocks[block][3*stride + offset];
+    t1 = blocks[block][4*stride + offset];
+    u5 = blocks[block][5*stride + offset];
+    t3 = blocks[block][6*stride + offset];
+    u7 = blocks[block][7*stride + offset];
 
-    float a1 = (c0 + c4) * 0.35355341434478759766 + tmp1 * 0.46193981170654296875;
-    float a4 = (c0 + c4) * 0.35355341434478759766 - tmp1 * 0.46193981170654296875;
+    /* Embedded scaled inverse 4-point Type-II DCT */
+    u0 = t0 + t1;
+    u1 = t0 - t1;
+    u3 = t2 + t3;
+    u2 = (t2 - t3)*(1.4142135623730950488016887242097f) - u3;
+    t0 = u0 + u3;
+    t3 = u0 - u3;
+    t1 = u1 + u2;
+    t2 = u1 - u2;
 
-    float a3 = (c0 - c4) * 0.35355341434478759766 + tmp2 * 0.19134169816970825195;
-    float a2 = (c0 - c4) * 0.35355341434478759766 - tmp2 * 0.19134169816970825195;
+    /* Embedded scaled inverse 4-point Type-IV DST */
+    t5 = u5 + u6;
+    t6 = u5 - u6;
+    t7 = u4 + u7;
+    t4 = u4 - u7;
+    u7 = t7 + t5;
+    u5 = (t7 - t5)*(1.4142135623730950488016887242097f);
+    u8 = (t4 + t6)*(1.8477590650225735122563663787936f);
+    u4 = u8 - t4*(1.0823922002923939687994464107328f);
+    u6 = u8 - t6*(2.6131259297527530557132863468544f);
+    t7 = u7;
+    t6 = t7 - u6;
+    t5 = t6 + u5;
+    t4 = t5 - u4;
 
-    float tmp3 = (c3 - c5) * 0.70710682868957519531 + c7;
-    float tmp4 = (c3 - c5) * 0.70710682868957519531 - c7;
+    /* Butterflies */
+    u0 = t0 + t7;
+    u7 = t0 - t7;
+    u6 = t1 + t6;
+    u1 = t1 - t6;
+    u2 = t2 + t5;
+    u5 = t2 - t5;
+    u4 = t3 + t4;
+    u3 = t3 - t4;
 
-    float tmp5 = (c5 - c7) *  1.4142134189605712891 + (c5 - c7) + (c1 - c3);
-    float tmp6 = (c5 - c7) * -1.4142134189605712891 + (c5 - c7) + (c1 - c3);
-
-    float m1 = tmp3 *  2.6131260395050048828 + tmp5;
-    float m4 = tmp3 * -2.6131260395050048828 + tmp5;
-
-    float m2 = tmp4 *  1.0823919773101806641 + tmp6;
-    float m3 = tmp4 * -1.0823919773101806641 + tmp6;
-
-    blocks[block][0*stride + offset] = m1 *  0.49039259552955627441  + a1;
-    blocks[block][7*stride + offset] = m1 * -0.49039259552955627441  + a1;
-    blocks[block][1*stride + offset] = m2 *  0.41573479771614074707  + a2;
-    blocks[block][6*stride + offset] = m2 * -0.41573479771614074707  + a2;
-    blocks[block][2*stride + offset] = m3 *  0.27778509259223937988  + a3;
-    blocks[block][5*stride + offset] = m3 * -0.27778509259223937988  + a3;
-    blocks[block][3*stride + offset] = m4 *  0.097545139491558074951 + a4;
-    blocks[block][4*stride + offset] = m4 * -0.097545139491558074951 + a4;
+    /* Output */
+    blocks[block][0*stride + offset] = u0;
+    blocks[block][1*stride + offset] = u1;
+    blocks[block][2*stride + offset] = u2;
+    blocks[block][3*stride + offset] = u3;
+    blocks[block][4*stride + offset] = u4;
+    blocks[block][5*stride + offset] = u5;
+    blocks[block][6*stride + offset] = u6;
+    blocks[block][7*stride + offset] = u7;
 }
 
 void main(void)
@@ -90,14 +120,15 @@ void main(void)
     /* Coalesced load of DCT coeffs in shared memory, second part of inverse quantization */
     if (act) {
         /**
-         * According to spec indexing an array in push constant memory with
+         * According to the VK spec indexing an array in push constant memory with
          * a non-dynamically uniform value is illegal ($15.9.1 in v1.4.326),
          * so copy the whole matrix locally.
          */
         uint8_t[64] qmat = comp == 0 ? qmat_luma : qmat_chroma;
         [[unroll]] for (uint i = 0; i < 8; ++i) {
-            int v = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | i))), 16);
-            blocks[block][i * 9 + idx] = float(v * int(qmat[(i << 3) + idx]));
+            int   c = sign_extend(int(get_px(comp, ivec2(gid.x, (gid.y << 3) | i))), 16);
+            float v = float(c * int(qmat[(i << 3) + idx]));
+            blocks[block][i * 9 + idx] = v * idct_8x8_scales[idx] * idct_8x8_scales[i];
         }
     }
 
@@ -116,7 +147,7 @@ void main(void)
     barrier();
     if (act) {
         [[unroll]] for (uint i = 0; i < 8; ++i) {
-            float v = blocks[block][i * 9 + idx] * fact + off;
+            float v = round(blocks[block][i * 9 + idx] * fact + off);
             put_px(comp, ivec2(gid.x, (gid.y << 3) | i), clamp(int(v), 0, maxv) << shift);
         }
     }
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2025-11-02 19:28 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=176211165375.25.10975636295279296971@2cb04c0e5124 \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=code@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git