Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Lynne <dev@lynne.ee>
To: ffmpeg-devel@ffmpeg.org
Cc: Lynne <dev@lynne.ee>
Subject: [FFmpeg-devel] [PATCH 17/18] vulkan: add support for expect/assume
Date: Sat, 12 Apr 2025 09:22:48 +0200
Message-ID: <20250412072256.77815-17-dev@lynne.ee> (raw)
In-Reply-To: <20250412072256.77815-1-dev@lynne.ee>

This commit adds support for compiler hints.
While on AMD these are not used/needed, Nvidia benefits from them, and gives
a sizeable 10% speedup on 4k.
---
 libavcodec/vulkan/ffv1_dec.comp   | 16 ++++++++--------
 libavcodec/vulkan/rangecoder.comp | 12 ++++++------
 libavutil/hwcontext_vulkan.c      |  7 +++++++
 libavutil/vulkan.c                |  6 ++++++
 libavutil/vulkan_functions.h      |  1 +
 libavutil/vulkan_loader.h         |  1 +
 6 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp
index fd9b98023c..9eba322b27 100644
--- a/libavcodec/vulkan/ffv1_dec.comp
+++ b/libavcodec/vulkan/ffv1_dec.comp
@@ -31,7 +31,7 @@
 #ifdef RGB
 ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
 {
-    const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
+    const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0);
 
     /* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */
     VTYPE3 top  = VTYPE3(TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[0]),
@@ -47,10 +47,10 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
                quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
                quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
 
-    if (extend_lookup[quant_table_idx] > 0) {
+    if (expectEXT(extend_lookup[quant_table_idx] > 0, false)) {
         TYPE cur2 = TYPE(0);
-        if (off.x > 0) {
-            const ivec2 yoff_border2 = off.x == 1 ? ivec2(-1, -1) : ivec2(-2, 0);
+        if (expectEXT(off.x > 0, true)) {
+            const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0);
             cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + yoff_border2))[0]);
         }
         base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
@@ -110,7 +110,7 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
 #ifndef GOLOMB
 int get_isymbol(inout RangeCoder c, uint64_t state)
 {
-    if (get_rac(c, state))
+    if (expectEXT(get_rac(c, state), false))
         return 0;
 
     state += 1;
@@ -120,9 +120,9 @@ int get_isymbol(inout RangeCoder c, uint64_t state)
         if (!get_rac(c, state + min(e, 9)))
             break;
 
-    if (e == 0) {
+    if (expectEXT(e == 0, false)) {
         return get_rac(c, state + 10) ? -1 : 1;
-    } else if (e > 31) {
+    } else if (expectEXT(e > 31, false)) {
         corrupt = true;
         return 0;
     }
@@ -274,7 +274,7 @@ void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
         if (transparency != 0)
             pix.a = int(imageLoad(dec[3], lpos)[0]);
 
-        if (apply_rct)
+        if (expectEXT(apply_rct, true))
             pix = transform_sample(pix, sc.slice_rct_coef);
 
         imageStore(dst[0], pos, pix);
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
index ff0432511d..b95c722a5c 100644
--- a/libavcodec/vulkan/rangecoder.comp
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -141,7 +141,7 @@ void put_rac_equi(inout RangeCoder c, bool bit)
         c.range -= range1;
     }
 
-    if (c.range < 0x100)
+    if (expectEXT(c.range < 0x100, false))
         renorm_encoder(c);
 }
 
@@ -157,7 +157,7 @@ void put_rac_terminate(inout RangeCoder c)
 #endif
 
     c.range -= range1;
-    if (c.range < 0x100)
+    if (expectEXT(c.range < 0x100, false))
         renorm_encoder(c);
 }
 
@@ -218,7 +218,7 @@ void refill(inout RangeCoder c)
 {
     c.range <<= 8;
     c.low   <<= 8;
-    if (c.bytestream < c.bytestream_end) {
+    if (expectEXT(c.bytestream < c.bytestream_end, false)) {
         c.low |= u8buf(c.bytestream).v;
         c.bytestream++;
     } else {
@@ -239,7 +239,7 @@ bool get_rac(inout RangeCoder c, uint64_t state)
     c.low = c.low - (bit ? ranged : 0);
     c.range = (bit ? 0 : ranged) - (bit ? range1 : 0);
 
-    if (c.range < 0x100)
+    if (expectEXT(c.range < 0x100, false))
         refill(c);
 
     return bit;
@@ -256,7 +256,7 @@ bool get_rac_direct(inout RangeCoder c, inout uint8_t state)
     c.low = c.low - (bit ? ranged : 0);
     c.range = (bit ? 0 : ranged) - (bit ? range1 : 0);
 
-    if (c.range < 0x100)
+    if (expectEXT(c.range < 0x100, false))
         refill(c);
 
     return bit;
@@ -274,7 +274,7 @@ bool get_rac_equi(inout RangeCoder c)
         c.range = range1;
     }
 
-    if (c.range < 0x100)
+    if (expectEXT(c.range < 0x100, false))
         refill(c);
 
     return bit;
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index d11c0274d2..f7d43248e8 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -79,6 +79,7 @@ typedef struct VulkanDeviceFeatures {
     VkPhysicalDeviceVulkan12Features vulkan_1_2;
     VkPhysicalDeviceVulkan13Features vulkan_1_3;
     VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore;
+    VkPhysicalDeviceShaderExpectAssumeFeatures expect_assume;
 
     VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maintenance_1;
 #ifdef VK_KHR_video_maintenance2
@@ -209,6 +210,9 @@ static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *f
     OPT_CHAIN(&feats->timeline_semaphore, FF_VK_EXT_PORTABILITY_SUBSET,
               VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES);
 
+    OPT_CHAIN(&feats->expect_assume, FF_VK_EXT_EXPECT_ASSUME,
+              VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_EXPECT_ASSUME_FEATURES_KHR);
+
     OPT_CHAIN(&feats->video_maintenance_1, FF_VK_EXT_VIDEO_MAINTENANCE_1,
               VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR);
 #ifdef VK_KHR_video_maintenance2
@@ -301,6 +305,8 @@ static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF
     COPY_VAL(relaxed_extended_instruction.shaderRelaxedExtendedInstruction);
 #endif
 
+    COPY_VAL(expect_assume.shaderExpectAssume);
+
     COPY_VAL(optical_flow.opticalFlow);
 #undef COPY_VAL
 }
@@ -615,6 +621,7 @@ static const VulkanOptExtension optional_device_exts[] = {
     { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME,               FF_VK_EXT_COOP_MATRIX            },
     { VK_NV_OPTICAL_FLOW_EXTENSION_NAME,                      FF_VK_EXT_OPTICAL_FLOW           },
     { VK_EXT_SHADER_OBJECT_EXTENSION_NAME,                    FF_VK_EXT_SHADER_OBJECT          },
+    { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME,             FF_VK_EXT_EXPECT_ASSUME          },
     { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME,              FF_VK_EXT_VIDEO_MAINTENANCE_1    },
 #ifdef VK_KHR_video_maintenance2
     { VK_KHR_VIDEO_MAINTENANCE_2_EXTENSION_NAME,              FF_VK_EXT_VIDEO_MAINTENANCE_2    },
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 7650e83d1d..bee9d3da23 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -2046,6 +2046,12 @@ int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name,
     GLSLC(0, #extension GL_EXT_scalar_block_layout : require                  );
     GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : require     );
     GLSLC(0, #extension GL_EXT_control_flow_attributes : require              );
+    if (s->extensions & FF_VK_EXT_EXPECT_ASSUME) {
+        GLSLC(0, #extension GL_EXT_expect_assume : require                    );
+    } else {
+        GLSLC(0, #define assumeEXT(x) (x)                                     );
+        GLSLC(0, #define expectEXT(x) (x)                                     );
+    }
     if ((s->extensions & FF_VK_EXT_DEBUG_UTILS) &&
         (s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR)) {
         GLSLC(0, #extension GL_EXT_debug_printf : require                     );
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index 8f2bbb38c9..cd61d71577 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -47,6 +47,7 @@ typedef uint64_t FFVulkanExtensions;
 #define FF_VK_EXT_SHADER_OBJECT          (1ULL << 13) /* VK_EXT_shader_object */
 #define FF_VK_EXT_PUSH_DESCRIPTOR        (1ULL << 14) /* VK_KHR_push_descriptor */
 #define FF_VK_EXT_RELAXED_EXTENDED_INSTR (1ULL << 15) /* VK_KHR_shader_relaxed_extended_instruction */
+#define FF_VK_EXT_EXPECT_ASSUME          (1ULL << 16) /* VK_KHR_shader_expect_assume */
 
 /* Video extensions */
 #define FF_VK_EXT_VIDEO_QUEUE            (1ULL << 36) /* VK_KHR_video_queue */
diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h
index 6d5bbf057a..3641fcb22e 100644
--- a/libavutil/vulkan_loader.h
+++ b/libavutil/vulkan_loader.h
@@ -76,6 +76,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions,
         { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME,         FF_VK_EXT_VIDEO_DECODE_H265      },
         { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME,          FF_VK_EXT_VIDEO_DECODE_AV1       },
         { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,           FF_VK_EXT_PUSH_DESCRIPTOR        },
+        { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME,      FF_VK_EXT_EXPECT_ASSUME          },
     };
 
     FFVulkanExtensions mask = 0x0;
-- 
2.47.2
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  parent reply	other threads:[~2025-04-12  7:26 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-12  7:22 [FFmpeg-devel] [PATCH 01/18] hwcontext_vulkan: disable descriptor buffer extension on Intel Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 02/18] vulkan_ffv1: enable acceleration " Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 03/18] vulkan_ffv1: remove unused define Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 04/18] vulkan_ffv1: slightly optimize the range decoder Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 05/18] vulkan_ffv1: optimize symbol reader Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 06/18] vulkan_ffv1: allocate just as much memory for slice state as needed Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 07/18] vulkan_ffv1: init overread/corrupt fields Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 08/18] vulkan_ffv1: fallback to upload if mapping packet fails, fix fallback Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 09/18] vulkan_ffv1: fix reset shader dependencies Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 10/18] vulkan_ffv1: improve buffer barrier correctness for slice state Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 11/18] vulkan_ffv1: fix left-2 sample addressing Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 12/18] vulkan_ffv1: cache only 2 lines when decoding RGB Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 13/18] ffv1/vulkan: redo context count tracking and quant_table_idx management Lynne
2025-04-13 20:39   ` Jerome Martinez
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 14/18] vulkan_ffv1: externalize extended lookup check Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 15/18] vulkan_ffv1: remove need for scratch data during setup Lynne
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 16/18] vulkan_ffv1: shortcut +-1 coeffs in symbol reading Lynne
2025-04-12  7:22 ` Lynne [this message]
2025-04-12  7:22 ` [FFmpeg-devel] [PATCH 18/18] vulkan_ffv1: add cached symbol reader for AMD Lynne
2025-04-13 13:38 ` [FFmpeg-devel] [PATCH 01/18] hwcontext_vulkan: disable descriptor buffer extension on Intel Jerome Martinez

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250412072256.77815-17-dev@lynne.ee \
    --to=dev@lynne.ee \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git