From: Lynne <dev@lynne.ee> To: ffmpeg-devel@ffmpeg.org Cc: Lynne <dev@lynne.ee> Subject: [FFmpeg-devel] [PATCH 17/18] vulkan: add support for expect/assume Date: Sat, 12 Apr 2025 09:22:48 +0200 Message-ID: <20250412072256.77815-17-dev@lynne.ee> (raw) In-Reply-To: <20250412072256.77815-1-dev@lynne.ee> This commit adds support for compiler hints. While on AMD these are not used/needed, Nvidia benefits from them, and gives a sizeable 10% speedup on 4k. --- libavcodec/vulkan/ffv1_dec.comp | 16 ++++++++-------- libavcodec/vulkan/rangecoder.comp | 12 ++++++------ libavutil/hwcontext_vulkan.c | 7 +++++++ libavutil/vulkan.c | 6 ++++++ libavutil/vulkan_functions.h | 1 + libavutil/vulkan_loader.h | 1 + 6 files changed, 29 insertions(+), 14 deletions(-) diff --git a/libavcodec/vulkan/ffv1_dec.comp b/libavcodec/vulkan/ffv1_dec.comp index fd9b98023c..9eba322b27 100644 --- a/libavcodec/vulkan/ffv1_dec.comp +++ b/libavcodec/vulkan/ffv1_dec.comp @@ -31,7 +31,7 @@ #ifdef RGB ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) { - const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0); + const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? ivec2(1, -1) : ivec2(0, 0); /* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */ VTYPE3 top = VTYPE3(TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[0]), @@ -47,10 +47,10 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] + quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK]; - if (extend_lookup[quant_table_idx] > 0) { + if (expectEXT(extend_lookup[quant_table_idx] > 0, false)) { TYPE cur2 = TYPE(0); - if (off.x > 0) { - const ivec2 yoff_border2 = off.x == 1 ? ivec2(-1, -1) : ivec2(-2, 0); + if (expectEXT(off.x > 0, true)) { + const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0); cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + yoff_border2))[0]); } base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK]; @@ -110,7 +110,7 @@ ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx) #ifndef GOLOMB int get_isymbol(inout RangeCoder c, uint64_t state) { - if (get_rac(c, state)) + if (expectEXT(get_rac(c, state), false)) return 0; state += 1; @@ -120,9 +120,9 @@ int get_isymbol(inout RangeCoder c, uint64_t state) if (!get_rac(c, state + min(e, 9))) break; - if (e == 0) { + if (expectEXT(e == 0, false)) { return get_rac(c, state + 10) ? -1 : 1; - } else if (e > 31) { + } else if (expectEXT(e > 31, false)) { corrupt = true; return 0; } @@ -274,7 +274,7 @@ void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) if (transparency != 0) pix.a = int(imageLoad(dec[3], lpos)[0]); - if (apply_rct) + if (expectEXT(apply_rct, true)) pix = transform_sample(pix, sc.slice_rct_coef); imageStore(dst[0], pos, pix); diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp index ff0432511d..b95c722a5c 100644 --- a/libavcodec/vulkan/rangecoder.comp +++ b/libavcodec/vulkan/rangecoder.comp @@ -141,7 +141,7 @@ void put_rac_equi(inout RangeCoder c, bool bit) c.range -= range1; } - if (c.range < 0x100) + if (expectEXT(c.range < 0x100, false)) renorm_encoder(c); } @@ -157,7 +157,7 @@ void put_rac_terminate(inout RangeCoder c) #endif c.range -= range1; - if (c.range < 0x100) + if (expectEXT(c.range < 0x100, false)) renorm_encoder(c); } @@ -218,7 +218,7 @@ void refill(inout RangeCoder c) { c.range <<= 8; c.low <<= 8; - if (c.bytestream < c.bytestream_end) { + if (expectEXT(c.bytestream < c.bytestream_end, false)) { c.low |= u8buf(c.bytestream).v; c.bytestream++; } else { @@ -239,7 +239,7 @@ bool get_rac(inout RangeCoder c, uint64_t state) c.low = c.low - (bit ? ranged : 0); c.range = (bit ? 0 : ranged) - (bit ? range1 : 0); - if (c.range < 0x100) + if (expectEXT(c.range < 0x100, false)) refill(c); return bit; @@ -256,7 +256,7 @@ bool get_rac_direct(inout RangeCoder c, inout uint8_t state) c.low = c.low - (bit ? ranged : 0); c.range = (bit ? 0 : ranged) - (bit ? range1 : 0); - if (c.range < 0x100) + if (expectEXT(c.range < 0x100, false)) refill(c); return bit; @@ -274,7 +274,7 @@ bool get_rac_equi(inout RangeCoder c) c.range = range1; } - if (c.range < 0x100) + if (expectEXT(c.range < 0x100, false)) refill(c); return bit; diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index d11c0274d2..f7d43248e8 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -79,6 +79,7 @@ typedef struct VulkanDeviceFeatures { VkPhysicalDeviceVulkan12Features vulkan_1_2; VkPhysicalDeviceVulkan13Features vulkan_1_3; VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore; + VkPhysicalDeviceShaderExpectAssumeFeatures expect_assume; VkPhysicalDeviceVideoMaintenance1FeaturesKHR video_maintenance_1; #ifdef VK_KHR_video_maintenance2 @@ -209,6 +210,9 @@ static void device_features_init(AVHWDeviceContext *ctx, VulkanDeviceFeatures *f OPT_CHAIN(&feats->timeline_semaphore, FF_VK_EXT_PORTABILITY_SUBSET, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES); + OPT_CHAIN(&feats->expect_assume, FF_VK_EXT_EXPECT_ASSUME, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_EXPECT_ASSUME_FEATURES_KHR); + OPT_CHAIN(&feats->video_maintenance_1, FF_VK_EXT_VIDEO_MAINTENANCE_1, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_MAINTENANCE_1_FEATURES_KHR); #ifdef VK_KHR_video_maintenance2 @@ -301,6 +305,8 @@ static void device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF COPY_VAL(relaxed_extended_instruction.shaderRelaxedExtendedInstruction); #endif + COPY_VAL(expect_assume.shaderExpectAssume); + COPY_VAL(optical_flow.opticalFlow); #undef COPY_VAL } @@ -615,6 +621,7 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME, FF_VK_EXT_COOP_MATRIX }, { VK_NV_OPTICAL_FLOW_EXTENSION_NAME, FF_VK_EXT_OPTICAL_FLOW }, { VK_EXT_SHADER_OBJECT_EXTENSION_NAME, FF_VK_EXT_SHADER_OBJECT }, + { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME }, { VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_1 }, #ifdef VK_KHR_video_maintenance2 { VK_KHR_VIDEO_MAINTENANCE_2_EXTENSION_NAME, FF_VK_EXT_VIDEO_MAINTENANCE_2 }, diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 7650e83d1d..bee9d3da23 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -2046,6 +2046,12 @@ int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, GLSLC(0, #extension GL_EXT_scalar_block_layout : require ); GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : require ); GLSLC(0, #extension GL_EXT_control_flow_attributes : require ); + if (s->extensions & FF_VK_EXT_EXPECT_ASSUME) { + GLSLC(0, #extension GL_EXT_expect_assume : require ); + } else { + GLSLC(0, #define assumeEXT(x) (x) ); + GLSLC(0, #define expectEXT(x) (x) ); + } if ((s->extensions & FF_VK_EXT_DEBUG_UTILS) && (s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR)) { GLSLC(0, #extension GL_EXT_debug_printf : require ); diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index 8f2bbb38c9..cd61d71577 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -47,6 +47,7 @@ typedef uint64_t FFVulkanExtensions; #define FF_VK_EXT_SHADER_OBJECT (1ULL << 13) /* VK_EXT_shader_object */ #define FF_VK_EXT_PUSH_DESCRIPTOR (1ULL << 14) /* VK_KHR_push_descriptor */ #define FF_VK_EXT_RELAXED_EXTENDED_INSTR (1ULL << 15) /* VK_KHR_shader_relaxed_extended_instruction */ +#define FF_VK_EXT_EXPECT_ASSUME (1ULL << 16) /* VK_KHR_shader_expect_assume */ /* Video extensions */ #define FF_VK_EXT_VIDEO_QUEUE (1ULL << 36) /* VK_KHR_video_queue */ diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index 6d5bbf057a..3641fcb22e 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -76,6 +76,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 }, { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR }, + { VK_KHR_SHADER_EXPECT_ASSUME_EXTENSION_NAME, FF_VK_EXT_EXPECT_ASSUME }, }; FFVulkanExtensions mask = 0x0; -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2025-04-12 7:26 UTC|newest] Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top 2025-04-12 7:22 [FFmpeg-devel] [PATCH 01/18] hwcontext_vulkan: disable descriptor buffer extension on Intel Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 02/18] vulkan_ffv1: enable acceleration " Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 03/18] vulkan_ffv1: remove unused define Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 04/18] vulkan_ffv1: slightly optimize the range decoder Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 05/18] vulkan_ffv1: optimize symbol reader Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 06/18] vulkan_ffv1: allocate just as much memory for slice state as needed Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 07/18] vulkan_ffv1: init overread/corrupt fields Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 08/18] vulkan_ffv1: fallback to upload if mapping packet fails, fix fallback Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 09/18] vulkan_ffv1: fix reset shader dependencies Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 10/18] vulkan_ffv1: improve buffer barrier correctness for slice state Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 11/18] vulkan_ffv1: fix left-2 sample addressing Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 12/18] vulkan_ffv1: cache only 2 lines when decoding RGB Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 13/18] ffv1/vulkan: redo context count tracking and quant_table_idx management Lynne 2025-04-13 20:39 ` Jerome Martinez 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 14/18] vulkan_ffv1: externalize extended lookup check Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 15/18] vulkan_ffv1: remove need for scratch data during setup Lynne 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 16/18] vulkan_ffv1: shortcut +-1 coeffs in symbol reading Lynne 2025-04-12 7:22 ` Lynne [this message] 2025-04-12 7:22 ` [FFmpeg-devel] [PATCH 18/18] vulkan_ffv1: add cached symbol reader for AMD Lynne 2025-04-13 13:38 ` [FFmpeg-devel] [PATCH 01/18] hwcontext_vulkan: disable descriptor buffer extension on Intel Jerome Martinez
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20250412072256.77815-17-dev@lynne.ee \ --to=dev@lynne.ee \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git