From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id 2549B4D154 for ; Mon, 17 Feb 2025 18:31:54 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 911BF68C069; Mon, 17 Feb 2025 20:31:40 +0200 (EET) Received: from vidala.pars.ee (vidala.pars.ee [116.203.72.101]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id CA13668BFFF for ; Mon, 17 Feb 2025 20:31:33 +0200 (EET) DKIM-Signature: v=1; a=rsa-sha256; s=202405r; d=lynne.ee; c=relaxed/relaxed; h=Message-ID:Date:Subject:To:From; t=1739817093; bh=prSQtKyuld5HMe7Voso8iZW L8JI2UyCz/SMAcwYWKYY=; b=o1CixdCOivTT94P1+eo77gNSXZH1T5smNOoxLwdDvaOsXvELyf pIx9rtO8uGRRqrrtQpZHyPsD5ZUGEMW4KrbDNrrFAiIRUl4vadD6/GFCF3Qk5oWr1db9KZlfkHa nKNRNJmz5UK+GbDlv0fgwurE3UARKn8uni1zZAM1IJrKFXtGfGsrgFts/yEU4x4GqXHIl3KgERK LXdMpGHOjBe0ITUG+yYJmSLlfv4OZeLk3wA4hP4pA/o78caqnr1mpqNCBjAg9G2tVGnQCM3NzEw s+ftnN3DbY8OC7EvKfouCrIZmMvXTiQr5G233B14PdjXHdxW4fihGOeHDkP6nQlgl9g==; DKIM-Signature: v=1; a=ed25519-sha256; s=202405e; d=lynne.ee; c=relaxed/relaxed; h=Message-ID:Date:Subject:To:From; t=1739817093; bh=prSQtKyuld5HMe7Voso8iZW L8JI2UyCz/SMAcwYWKYY=; b=IEDYEI+jjqog2gHVcVJyQZtkQhT5L2Js6CZRkrbioLp1BSACip hAgpDA7hbFqhcSLhDYEg37L50klxdTr1D0AA==; From: Lynne To: ffmpeg-devel@ffmpeg.org Date: Mon, 17 Feb 2025 19:31:12 +0100 Message-ID: <20250217183125.57656-2-dev@lynne.ee> X-Mailer: git-send-email 2.47.2 In-Reply-To: <20250217183125.57656-1-dev@lynne.ee> References: <20250217183125.57656-1-dev@lynne.ee> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 02/11] bwdif_vulkan: convert to storage images X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Lynne Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: texture() uses bilinear scaling; imageLoad() accesses the image directly. The reason why texture() was used throughout Vulkan filters is that back when they were written, they were targetting old Intel hardware, which had a texel cache only for sampled images. These days, GPUs have a generic cache that doesn't care what source it gets populated with. Additionally, bypassing the sampling circuitry saves us some performance. Finally, all the old texture() code had an issue where unnormalized coordinates were used, but an offset of 0.5 was not added, hence each pixel ended up being interpolated. This fixes this. --- libavfilter/vf_bwdif_vulkan.c | 26 ++++++-------- libavfilter/vulkan/bwdif.comp | 68 +++++++++++++++++------------------ 2 files changed, 45 insertions(+), 49 deletions(-) diff --git a/libavfilter/vf_bwdif_vulkan.c b/libavfilter/vf_bwdif_vulkan.c index 0afe8ac0ed..549e814886 100644 --- a/libavfilter/vf_bwdif_vulkan.c +++ b/libavfilter/vf_bwdif_vulkan.c @@ -34,7 +34,6 @@ typedef struct BWDIFVulkanContext { int initialized; FFVkExecPool e; AVVulkanDeviceQueueFamily *qf; - VkSampler sampler; FFVulkanShader shd; } BWDIFVulkanContext; @@ -73,7 +72,6 @@ static av_cold int init_filter(AVFilterContext *ctx) } RET(ff_vk_exec_pool_init(vkctx, s->qf, &s->e, s->qf->num*4, 0, 0, 0, NULL)); - RET(ff_vk_init_sampler(vkctx, &s->sampler, 1, VK_FILTER_NEAREST)); RET(ff_vk_shader_init(vkctx, &s->shd, "bwdif", VK_SHADER_STAGE_COMPUTE_BIT, @@ -85,27 +83,30 @@ static av_cold int init_filter(AVFilterContext *ctx) desc = (FFVulkanDescriptorSetBinding []) { { .name = "prev", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .samplers = DUP_SAMPLER(s->sampler), }, { .name = "cur", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .samplers = DUP_SAMPLER(s->sampler), }, { .name = "next", - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format, FF_VK_REP_FLOAT), + .mem_quali = "readonly", .dimensions = 2, .elems = planes, .stages = VK_SHADER_STAGE_COMPUTE_BIT, - .samplers = DUP_SAMPLER(s->sampler), }, { .name = "dst", @@ -166,7 +167,7 @@ static av_cold int init_filter(AVFilterContext *ctx) GLSLC(2, if (!IS_WITHIN(pos, size)) ); GLSLC(3, return; ); } - GLSLF(2, imageStore(dst[%i], pos, texture(cur[%i], pos)); ,i, i); + GLSLF(2, imageStore(dst[%i], pos, imageLoad(cur[%i], pos)); ,i, i); } GLSLC(1, } ); GLSLC(0, } ); @@ -201,7 +202,7 @@ static void bwdif_vulkan_filter_frame(AVFilterContext *ctx, AVFrame *dst, ff_vk_filter_process_Nin(&s->vkctx, &s->e, &s->shd, dst, (AVFrame *[]){ y->prev, y->cur, y->next }, 3, - s->sampler, ¶ms, sizeof(params)); + VK_NULL_HANDLE, ¶ms, sizeof(params)); if (y->current_field == YADIF_FIELD_END) y->current_field = YADIF_FIELD_NORMAL; @@ -211,15 +212,10 @@ static void bwdif_vulkan_uninit(AVFilterContext *avctx) { BWDIFVulkanContext *s = avctx->priv; FFVulkanContext *vkctx = &s->vkctx; - FFVulkanFunctions *vk = &vkctx->vkfn; ff_vk_exec_pool_free(vkctx, &s->e); ff_vk_shader_free(vkctx, &s->shd); - if (s->sampler) - vk->DestroySampler(vkctx->hwctx->act_dev, s->sampler, - vkctx->hwctx->alloc); - ff_vk_uninit(&s->vkctx); ff_yadif_uninit(avctx); diff --git a/libavfilter/vulkan/bwdif.comp b/libavfilter/vulkan/bwdif.comp index aec58c656b..5c988f472e 100644 --- a/libavfilter/vulkan/bwdif.comp +++ b/libavfilter/vulkan/bwdif.comp @@ -30,10 +30,10 @@ vec4 process_intra(vec4 cur[4]) void process_plane_intra(int idx, ivec2 pos) { vec4 dcur[4]; - dcur[0] = texture(cur[idx], pos - ivec2(0, 3)); - dcur[1] = texture(cur[idx], pos - ivec2(0, 1)); - dcur[2] = texture(cur[idx], pos + ivec2(0, 1)); - dcur[3] = texture(cur[idx], pos + ivec2(0, 3)); + dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3)); + dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1)); + dcur[2] = imageLoad(cur[idx], pos + ivec2(0, 1)); + dcur[3] = imageLoad(cur[idx], pos + ivec2(0, 3)); imageStore(dst[idx], pos, process_intra(dcur)); } @@ -81,41 +81,41 @@ void process_plane(int idx, const ivec2 pos, bool filter_field, vec4 prev2[5]; vec4 next2[5]; - dcur[0] = texture(cur[idx], pos - ivec2(0, 3)); - dcur[1] = texture(cur[idx], pos - ivec2(0, 1)); - dcur[2] = texture(cur[idx], pos + ivec2(0, 1)); - dcur[3] = texture(cur[idx], pos + ivec2(0, 3)); + dcur[0] = imageLoad(cur[idx], pos - ivec2(0, 3)); + dcur[1] = imageLoad(cur[idx], pos - ivec2(0, 1)); + dcur[2] = imageLoad(cur[idx], pos + ivec2(0, 1)); + dcur[3] = imageLoad(cur[idx], pos + ivec2(0, 3)); - prev1[0] = texture(prev[idx], pos - ivec2(0, 1)); - prev1[1] = texture(prev[idx], pos + ivec2(0, 1)); + prev1[0] = imageLoad(prev[idx], pos - ivec2(0, 1)); + prev1[1] = imageLoad(prev[idx], pos + ivec2(0, 1)); - next1[0] = texture(next[idx], pos - ivec2(0, 1)); - next1[1] = texture(next[idx], pos + ivec2(0, 1)); + next1[0] = imageLoad(next[idx], pos - ivec2(0, 1)); + next1[1] = imageLoad(next[idx], pos + ivec2(0, 1)); if (field_parity) { - prev2[0] = texture(prev[idx], pos - ivec2(0, 4)); - prev2[1] = texture(prev[idx], pos - ivec2(0, 2)); - prev2[2] = texture(prev[idx], pos); - prev2[3] = texture(prev[idx], pos + ivec2(0, 2)); - prev2[4] = texture(prev[idx], pos + ivec2(0, 4)); - - next2[0] = texture(cur[idx], pos - ivec2(0, 4)); - next2[1] = texture(cur[idx], pos - ivec2(0, 2)); - next2[2] = texture(cur[idx], pos); - next2[3] = texture(cur[idx], pos + ivec2(0, 2)); - next2[4] = texture(cur[idx], pos + ivec2(0, 4)); + prev2[0] = imageLoad(prev[idx], pos - ivec2(0, 4)); + prev2[1] = imageLoad(prev[idx], pos - ivec2(0, 2)); + prev2[2] = imageLoad(prev[idx], pos); + prev2[3] = imageLoad(prev[idx], pos + ivec2(0, 2)); + prev2[4] = imageLoad(prev[idx], pos + ivec2(0, 4)); + + next2[0] = imageLoad(cur[idx], pos - ivec2(0, 4)); + next2[1] = imageLoad(cur[idx], pos - ivec2(0, 2)); + next2[2] = imageLoad(cur[idx], pos); + next2[3] = imageLoad(cur[idx], pos + ivec2(0, 2)); + next2[4] = imageLoad(cur[idx], pos + ivec2(0, 4)); } else { - prev2[0] = texture(cur[idx], pos - ivec2(0, 4)); - prev2[1] = texture(cur[idx], pos - ivec2(0, 2)); - prev2[2] = texture(cur[idx], pos); - prev2[3] = texture(cur[idx], pos + ivec2(0, 2)); - prev2[4] = texture(cur[idx], pos + ivec2(0, 4)); - - next2[0] = texture(next[idx], pos - ivec2(0, 4)); - next2[1] = texture(next[idx], pos - ivec2(0, 2)); - next2[2] = texture(next[idx], pos); - next2[3] = texture(next[idx], pos + ivec2(0, 2)); - next2[4] = texture(next[idx], pos + ivec2(0, 4)); + prev2[0] = imageLoad(cur[idx], pos - ivec2(0, 4)); + prev2[1] = imageLoad(cur[idx], pos - ivec2(0, 2)); + prev2[2] = imageLoad(cur[idx], pos); + prev2[3] = imageLoad(cur[idx], pos + ivec2(0, 2)); + prev2[4] = imageLoad(cur[idx], pos + ivec2(0, 4)); + + next2[0] = imageLoad(next[idx], pos - ivec2(0, 4)); + next2[1] = imageLoad(next[idx], pos - ivec2(0, 2)); + next2[2] = imageLoad(next[idx], pos); + next2[3] = imageLoad(next[idx], pos + ivec2(0, 2)); + next2[4] = imageLoad(next[idx], pos + ivec2(0, 4)); } imageStore(dst[idx], pos, process_line(prev2, prev1, dcur, next1, next2)); -- 2.47.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".