Re: [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths.

From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths.
Date: Mon, 19 May 2025 19:09:12 +0200
Message-ID: <AS8P250MB0744AA8740B5E715C20281768F9CA@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM> (raw)
In-Reply-To: <20250517204907.482987-4-47210458+raphaelthegreat@users.noreply.github.com>

IndecisiveTurtle:
> From: IndecisiveTurtle <geoster3d@gmail.com>
> 
> Performance wise, encoding a 3440x1440 1-minute video is performed in about 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it takes about 1.3 minutes on my NVIDIA GTX 1650

The last iteration of this patchset claimed 2.5m for the software
encoder vs 30s hardware. The software performance improvement seems
small compared to what I expected, yet I am surprised about the hardware
slowdown (presuming it was the same file). Was the switch to the lut
based writing of codes not beneficial?

> 
> Haar shader has a subgroup optimized variant that applies when configured wavelet depth allows it
> ---
>  configure                                    |   1 +
>  libavcodec/Makefile                          |   3 +
>  libavcodec/allcodecs.c                       |   1 +
>  libavcodec/vc2enc_vulkan.c                   | 775 +++++++++++++++++++
>  libavcodec/vulkan/vc2_dwt_haar.comp          |  82 ++
>  libavcodec/vulkan/vc2_dwt_haar_subgroup.comp |  75 ++
>  libavcodec/vulkan/vc2_dwt_hor_legall.comp    |  82 ++
>  libavcodec/vulkan/vc2_dwt_upload.comp        |  96 +++
>  libavcodec/vulkan/vc2_dwt_ver_legall.comp    |  78 ++
>  libavcodec/vulkan/vc2_encode.comp            | 159 ++++
>  libavcodec/vulkan/vc2_slice_sizes.comp       | 170 ++++
>  11 files changed, 1522 insertions(+)
>  create mode 100644 libavcodec/vc2enc_vulkan.c
>  create mode 100644 libavcodec/vulkan/vc2_dwt_haar.comp
>  create mode 100644 libavcodec/vulkan/vc2_dwt_haar_subgroup.comp
>  create mode 100644 libavcodec/vulkan/vc2_dwt_hor_legall.comp
>  create mode 100644 libavcodec/vulkan/vc2_dwt_upload.comp
>  create mode 100644 libavcodec/vulkan/vc2_dwt_ver_legall.comp
>  create mode 100644 libavcodec/vulkan/vc2_encode.comp
>  create mode 100644 libavcodec/vulkan/vc2_slice_sizes.comp
> 

> +#define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
> +static const AVOption vc2enc_options[] = {
> +    {"tolerance",     "Max undershoot in percent", offsetof(VC2EncContext, tolerance), AV_OPT_TYPE_DOUBLE, {.dbl = 5.0f}, 0.0f, 45.0f, VC2ENC_FLAGS, .unit = "tolerance"},
> +    {"slice_width",   "Slice width",  offsetof(VC2EncContext, slice_width), AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024, VC2ENC_FLAGS, .unit = "slice_width"},
> +    {"slice_height",  "Slice height", offsetof(VC2EncContext, slice_height), AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024, VC2ENC_FLAGS, .unit = "slice_height"},
> +    {"wavelet_depth", "Transform depth", offsetof(VC2EncContext, wavelet_depth), AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5, VC2ENC_FLAGS, .unit = "wavelet_depth"},
> +    {"wavelet_type",  "Transform type",  offsetof(VC2EncContext, wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_5_3}, 0, VC2_TRANSFORMS_NB, VC2ENC_FLAGS, .unit = "wavelet_idx"},

You don't allow the 9_7 wavelet here (intentionally?), but then you
should restrict the range to disallow the value 0 (== VC2_TRANSFORM_9_7).

> +        {"5_3",          "LeGall (5,3)",            0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_5_3},    INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"},
> +        {"haar",         "Haar (with shift)",       0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR_S}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"},
> +        {"haar_noshift", "Haar (without shift)",    0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR},   INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"},
> +    {"qm", "Custom quantization matrix", offsetof(VC2EncContext, quant_matrix), AV_OPT_TYPE_INT, {.i64 = VC2_QM_DEF}, 0, VC2_QM_NB, VC2ENC_FLAGS, .unit = "quant_matrix"},
> +        {"default",   "Default from the specifications", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_DEF}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"},
> +        {"color",     "Prevents low bitrate discoloration", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_COL}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"},
> +        {"flat",      "Optimize for PSNR", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_FLAT}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"},
> +    {NULL}
> +};
> +
> +static const AVClass vc2enc_class = {
> +    .class_name = "vc2_vulkan_encoder",
> +    .category = AV_CLASS_CATEGORY_ENCODER,
> +    .option = vc2enc_options,
> +    .item_name = av_default_item_name,
> +    .version = LIBAVUTIL_VERSION_INT
> +};
> +
> +static const FFCodecDefault vc2enc_defaults[] = {
> +    { "b",              "600000000"   },
> +    { NULL },
> +};
> +
> +static const AVCodecHWConfigInternal *const ff_vc2_hw_configs[] = {

Should not use ff_ prefix.

> +    HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
> +    HW_CONFIG_ENCODER_DEVICE(NONE,  VULKAN),
> +    NULL,
> +};
> +
> +const FFCodec ff_vc2_vulkan_encoder = {
> +    .p.name         = "vc2_vulkan",
> +    CODEC_LONG_NAME("SMPTE VC-2"),
> +    .p.type         = AVMEDIA_TYPE_VIDEO,
> +    .p.id           = AV_CODEC_ID_DIRAC,
> +    .p.capabilities = AV_CODEC_CAP_HARDWARE,
> +    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
> +    .priv_data_size = sizeof(VC2EncVulkanContext),
> +    .init           = vc2_encode_init,
> +    .close          = vc2_encode_end,
> +    FF_CODEC_ENCODE_CB(vc2_encode_frame),
> +    .p.priv_class   = &vc2enc_class,
> +    .defaults       = vc2enc_defaults,
> +    CODEC_PIXFMTS(AV_PIX_FMT_VULKAN),
> +    .hw_configs     = ff_vc2_hw_configs,
> +};
> diff --git a/libavcodec/vulkan/vc2_encode.comp b/libavcodec/vulkan/vc2_encode.comp
> new file mode 100644
> index 0000000000..4d8adcca61
> --- /dev/null
> +++ b/libavcodec/vulkan/vc2_encode.comp
> @@ -0,0 +1,159 @@
> +/*
> + * VC2 codec
> + *
> + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#extension GL_EXT_shader_explicit_arithmetic_types : require
> +#extension GL_EXT_scalar_block_layout : require
> +#extension GL_EXT_buffer_reference : require
> +#extension GL_EXT_debug_printf : require
> +
> +#define MAX_DWT_LEVELS (5)
> +
> +layout(push_constant, scalar) uniform ComputeInfo {
> +    u8buf bytestream;
> +    ivec2 num_slices;
> +    int wavelet_depth;
> +    int size_scaler;
> +    int prefix_bytes;
> +};
> +
> +void put_vc2_ue_uint(inout PutBitContext pb, uint val)
> +{
> +    uint32_t pbits = 1;
> +    int bits = 1;
> +
> +    ++val;
> +
> +    while ((val >> 8) != 0)
> +    {
> +        pbits |= uint32_t(interleaved_ue_golomb_tab[val & 0xff]) << bits;
> +        val >>= 8;
> +        bits += 16;
> +    }
> +
> +    pbits |= uint32_t(top_interleaved_ue_golomb_tab[val]) << bits;
> +    bits  += golomb_len_tab[val];
> +    put_bits(pb, bits, pbits);

I see you switched to a lut based approach; yet you use 32 bits,
similarly to what the software decoder did before
af9935835335cae1ae5a4ec7fc14c1b5e25c1f2d. Can you guarantee that the
encoded coefficients fit into 32bits? Is this a requirement/consequence
of the spec?

> +}
> +
> +int quants[MAX_DWT_LEVELS][4];
> +
> +int subband_coord(int index, int h, int lvl)
> +{
> +    int coord = index;
> +    coord <<= 1;
> +    coord |= h;
> +    coord <<= (wavelet_depth-lvl-1);
> +    return coord;
> +}
> +
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".