* [FFmpeg-devel] [PATCH v5 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders @ 2025-05-23 20:23 IndecisiveTurtle 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 2/4] libavcodec/vc2enc: Switch quant to int IndecisiveTurtle ` (2 more replies) 0 siblings, 3 replies; 7+ messages in thread From: IndecisiveTurtle @ 2025-05-23 20:23 UTC (permalink / raw) To: ffmpeg-devel; +Cc: IndecisiveTurtle From: IndecisiveTurtle <geoster3d@gmail.com> --- libavcodec/Makefile | 2 +- libavcodec/vc2enc.c | 669 ++----------------------------------- libavcodec/vc2enc_common.c | 571 +++++++++++++++++++++++++++++++ libavcodec/vc2enc_common.h | 168 ++++++++++ 4 files changed, 765 insertions(+), 645 deletions(-) create mode 100644 libavcodec/vc2enc_common.c create mode 100644 libavcodec/vc2enc_common.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 77734dff24..bdf0d6742e 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -771,7 +771,7 @@ OBJS-$(CONFIG_VC1_CUVID_DECODER) += cuviddec.o OBJS-$(CONFIG_VC1_MMAL_DECODER) += mmaldec.o OBJS-$(CONFIG_VC1_QSV_DECODER) += qsvdec.o OBJS-$(CONFIG_VC1_V4L2M2M_DECODER) += v4l2_m2m_dec.o -OBJS-$(CONFIG_VC2_ENCODER) += vc2enc.o vc2enc_dwt.o diractab.o +OBJS-$(CONFIG_VC2_ENCODER) += vc2enc.o vc2enc_dwt.o vc2enc_common.o diractab.o OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdaudio.o OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdvideo.o diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c index 99ca95c40a..22a5a1b97c 100644 --- a/libavcodec/vc2enc.c +++ b/libavcodec/vc2enc.c @@ -30,80 +30,12 @@ #include "put_bits.h" #include "version.h" -#include "vc2enc_dwt.h" -#include "diractab.h" - -/* The limited size resolution of each slice forces us to do this */ -#define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes) +#include "vc2enc_common.h" /* Decides the cutoff point in # of slices to distribute the leftover bytes */ #define SLICE_REDIST_TOTAL 150 -typedef struct VC2BaseVideoFormat { - enum AVPixelFormat pix_fmt; - AVRational time_base; - int width, height; - uint8_t interlaced, level; - char name[13]; -} VC2BaseVideoFormat; - -static const VC2BaseVideoFormat base_video_fmts[] = { - { 0 }, /* Custom format, here just to make indexing equal to base_vf */ - { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 176, 120, 0, 1, "QSIF525" }, - { AV_PIX_FMT_YUV420P, { 2, 25 }, 176, 144, 0, 1, "QCIF" }, - { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 352, 240, 0, 1, "SIF525" }, - { AV_PIX_FMT_YUV420P, { 2, 25 }, 352, 288, 0, 1, "CIF" }, - { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 704, 480, 0, 1, "4SIF525" }, - { AV_PIX_FMT_YUV420P, { 2, 25 }, 704, 576, 0, 1, "4CIF" }, - - { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 480, 1, 2, "SD480I-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 25 }, 720, 576, 1, 2, "SD576I-50" }, - - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1280, 720, 0, 3, "HD720P-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1280, 720, 0, 3, "HD720P-50" }, - { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 1920, 1080, 1, 3, "HD1080I-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 25 }, 1920, 1080, 1, 3, "HD1080I-50" }, - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1920, 1080, 0, 3, "HD1080P-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1920, 1080, 0, 3, "HD1080P-50" }, - - { AV_PIX_FMT_YUV444P12, { 1, 24 }, 2048, 1080, 0, 4, "DC2K" }, - { AV_PIX_FMT_YUV444P12, { 1, 24 }, 4096, 2160, 0, 5, "DC4K" }, - - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 3840, 2160, 0, 6, "UHDTV 4K-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 3840, 2160, 0, 6, "UHDTV 4K-50" }, - - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 7680, 4320, 0, 7, "UHDTV 8K-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 7680, 4320, 0, 7, "UHDTV 8K-50" }, - - { AV_PIX_FMT_YUV422P10, { 1001, 24000 }, 1920, 1080, 0, 3, "HD1080P-24" }, - { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 486, 1, 2, "SD Pro486" }, -}; -static const int base_video_fmts_len = FF_ARRAY_ELEMS(base_video_fmts); - -enum VC2_QM { - VC2_QM_DEF = 0, - VC2_QM_COL, - VC2_QM_FLAT, - - VC2_QM_NB -}; - -typedef struct SubBand { - dwtcoef *buf; - ptrdiff_t stride; - int width; - int height; -} SubBand; - -typedef struct Plane { - SubBand band[MAX_DWT_LEVELS][4]; - dwtcoef *coef_buf; - int width; - int height; - int dwt_width; - int dwt_height; - ptrdiff_t coef_stride; -} Plane; +#define QUANT(c, mul, add, shift) (((mul) * (c) + (add)) >> (shift)) typedef struct SliceArgs { const struct VC2EncContext *ctx; @@ -119,418 +51,6 @@ typedef struct SliceArgs { int bytes; } SliceArgs; -typedef struct TransformArgs { - const struct VC2EncContext *ctx; - Plane *plane; - const void *idata; - ptrdiff_t istride; - int field; - VC2TransformContext t; -} TransformArgs; - -typedef struct VC2EncContext { - AVClass *av_class; - PutBitContext pb; - Plane plane[3]; - AVCodecContext *avctx; - DiracVersionInfo ver; - - SliceArgs *slice_args; - TransformArgs transform_args[3]; - - /* For conversion from unsigned pixel values to signed */ - int diff_offset; - int bpp; - int bpp_idx; - - /* Picture number */ - uint32_t picture_number; - - /* Base video format */ - int base_vf; - int level; - int profile; - - /* Quantization matrix */ - uint8_t quant[MAX_DWT_LEVELS][4]; - int custom_quant_matrix; - - /* Division LUT */ - uint32_t qmagic_lut[116][2]; - - int num_x; /* #slices horizontally */ - int num_y; /* #slices vertically */ - int prefix_bytes; - int size_scaler; - int chroma_x_shift; - int chroma_y_shift; - - /* Rate control stuff */ - int frame_max_bytes; - int slice_max_bytes; - int slice_min_bytes; - int q_ceil; - int q_avg; - - /* Options */ - double tolerance; - int wavelet_idx; - int wavelet_depth; - int strict_compliance; - int slice_height; - int slice_width; - int interlaced; - enum VC2_QM quant_matrix; - - /* Parse code state */ - uint32_t next_parse_offset; - enum DiracParseCodes last_parse_code; -} VC2EncContext; - -/// x_k x_{k-1} ... x_0 -> 0 x_k 0 x_{k - 1} ... 0 x_0 -static uint16_t interleaved_ue_golomb_tab[256]; -/// 1 x_{k-1} ... x_0 -> 0 0 0 x_{k - 1} ... 0 x_0 -static uint16_t top_interleaved_ue_golomb_tab[256]; -/// 1 x_{k-1} ... x_0 -> 2 * k -static uint8_t golomb_len_tab[256]; - -static av_cold void vc2_init_static_data(void) -{ - interleaved_ue_golomb_tab[1] = 1; - for (unsigned i = 2; i < 256; ++i) { - golomb_len_tab[i] = golomb_len_tab[i >> 1] + 2; - interleaved_ue_golomb_tab[i] = (interleaved_ue_golomb_tab[i >> 1] << 2) | (i & 1); - top_interleaved_ue_golomb_tab[i] = interleaved_ue_golomb_tab[i] ^ (1 << golomb_len_tab[i]); - } -} - -static av_always_inline void put_vc2_ue_uint_inline(PutBitContext *pb, uint32_t val) -{ - uint64_t pbits = 1; - int bits = 1; - - ++val; - - while (val >> 8) { - pbits |= (uint64_t)interleaved_ue_golomb_tab[val & 0xff] << bits; - val >>= 8; - bits += 16; - } - pbits |= (uint64_t)top_interleaved_ue_golomb_tab[val] << bits; - bits += golomb_len_tab[val]; - - put_bits63(pb, bits, pbits); -} - -static av_noinline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val) -{ - put_vc2_ue_uint_inline(pb, val); -} - -static av_always_inline int count_vc2_ue_uint(uint32_t val) -{ - return 2 * av_log2(val + 1) + 1; -} - -/* VC-2 10.4 - parse_info() */ -static void encode_parse_info(VC2EncContext *s, enum DiracParseCodes pcode) -{ - uint32_t cur_pos, dist; - - align_put_bits(&s->pb); - - cur_pos = put_bytes_count(&s->pb, 0); - - /* Magic string */ - ff_put_string(&s->pb, "BBCD", 0); - - /* Parse code */ - put_bits(&s->pb, 8, pcode); - - /* Next parse offset */ - dist = cur_pos - s->next_parse_offset; - AV_WB32(s->pb.buf + s->next_parse_offset + 5, dist); - s->next_parse_offset = cur_pos; - put_bits32(&s->pb, pcode == DIRAC_PCODE_END_SEQ ? 13 : 0); - - /* Last parse offset */ - put_bits32(&s->pb, s->last_parse_code == DIRAC_PCODE_END_SEQ ? 13 : dist); - - s->last_parse_code = pcode; -} - -/* VC-2 11.1 - parse_parameters() - * The level dictates what the decoder should expect in terms of resolution - * and allows it to quickly reject whatever it can't support. Remember, - * this codec kinda targets cheapo FPGAs without much memory. Unfortunately - * it also limits us greatly in our choice of formats, hence the flag to disable - * strict_compliance */ -static void encode_parse_params(VC2EncContext *s) -{ - put_vc2_ue_uint(&s->pb, s->ver.major); /* VC-2 demands this to be 2 */ - put_vc2_ue_uint(&s->pb, s->ver.minor); /* ^^ and this to be 0 */ - put_vc2_ue_uint(&s->pb, s->profile); /* 3 to signal HQ profile */ - put_vc2_ue_uint(&s->pb, s->level); /* 3 - 1080/720, 6 - 4K */ -} - -/* VC-2 11.3 - frame_size() */ -static void encode_frame_size(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) { - AVCodecContext *avctx = s->avctx; - put_vc2_ue_uint(&s->pb, avctx->width); - put_vc2_ue_uint(&s->pb, avctx->height); - } -} - -/* VC-2 11.3.3 - color_diff_sampling_format() */ -static void encode_sample_fmt(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) { - int idx; - if (s->chroma_x_shift == 1 && s->chroma_y_shift == 0) - idx = 1; /* 422 */ - else if (s->chroma_x_shift == 1 && s->chroma_y_shift == 1) - idx = 2; /* 420 */ - else - idx = 0; /* 444 */ - put_vc2_ue_uint(&s->pb, idx); - } -} - -/* VC-2 11.3.4 - scan_format() */ -static void encode_scan_format(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) - put_vc2_ue_uint(&s->pb, s->interlaced); -} - -/* VC-2 11.3.5 - frame_rate() */ -static void encode_frame_rate(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) { - AVCodecContext *avctx = s->avctx; - put_vc2_ue_uint(&s->pb, 0); - put_vc2_ue_uint(&s->pb, avctx->time_base.den); - put_vc2_ue_uint(&s->pb, avctx->time_base.num); - } -} - -/* VC-2 11.3.6 - aspect_ratio() */ -static void encode_aspect_ratio(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) { - AVCodecContext *avctx = s->avctx; - put_vc2_ue_uint(&s->pb, 0); - put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.num); - put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.den); - } -} - -/* VC-2 11.3.7 - clean_area() */ -static void encode_clean_area(VC2EncContext *s) -{ - put_bits(&s->pb, 1, 0); -} - -/* VC-2 11.3.8 - signal_range() */ -static void encode_signal_range(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) - put_vc2_ue_uint(&s->pb, s->bpp_idx); -} - -/* VC-2 11.3.9 - color_spec() */ -static void encode_color_spec(VC2EncContext *s) -{ - AVCodecContext *avctx = s->avctx; - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) { - int val; - put_vc2_ue_uint(&s->pb, 0); - - /* primaries */ - put_bits(&s->pb, 1, 1); - if (avctx->color_primaries == AVCOL_PRI_BT470BG) - val = 2; - else if (avctx->color_primaries == AVCOL_PRI_SMPTE170M) - val = 1; - else if (avctx->color_primaries == AVCOL_PRI_SMPTE240M) - val = 1; - else - val = 0; - put_vc2_ue_uint(&s->pb, val); - - /* color matrix */ - put_bits(&s->pb, 1, 1); - if (avctx->colorspace == AVCOL_SPC_RGB) - val = 3; - else if (avctx->colorspace == AVCOL_SPC_YCOCG) - val = 2; - else if (avctx->colorspace == AVCOL_SPC_BT470BG) - val = 1; - else - val = 0; - put_vc2_ue_uint(&s->pb, val); - - /* transfer function */ - put_bits(&s->pb, 1, 1); - if (avctx->color_trc == AVCOL_TRC_LINEAR) - val = 2; - else if (avctx->color_trc == AVCOL_TRC_BT1361_ECG) - val = 1; - else - val = 0; - put_vc2_ue_uint(&s->pb, val); - } -} - -/* VC-2 11.3 - source_parameters() */ -static void encode_source_params(VC2EncContext *s) -{ - encode_frame_size(s); - encode_sample_fmt(s); - encode_scan_format(s); - encode_frame_rate(s); - encode_aspect_ratio(s); - encode_clean_area(s); - encode_signal_range(s); - encode_color_spec(s); -} - -/* VC-2 11 - sequence_header() */ -static void encode_seq_header(VC2EncContext *s) -{ - align_put_bits(&s->pb); - encode_parse_params(s); - put_vc2_ue_uint(&s->pb, s->base_vf); - encode_source_params(s); - put_vc2_ue_uint(&s->pb, s->interlaced); /* Frames or fields coding */ -} - -/* VC-2 12.1 - picture_header() */ -static void encode_picture_header(VC2EncContext *s) -{ - align_put_bits(&s->pb); - put_bits32(&s->pb, s->picture_number++); -} - -/* VC-2 12.3.4.1 - slice_parameters() */ -static void encode_slice_params(VC2EncContext *s) -{ - put_vc2_ue_uint(&s->pb, s->num_x); - put_vc2_ue_uint(&s->pb, s->num_y); - put_vc2_ue_uint(&s->pb, s->prefix_bytes); - put_vc2_ue_uint(&s->pb, s->size_scaler); -} - -/* 1st idx = LL, second - vertical, third - horizontal, fourth - total */ -static const uint8_t vc2_qm_col_tab[][4] = { - {20, 9, 15, 4}, - { 0, 6, 6, 4}, - { 0, 3, 3, 5}, - { 0, 3, 5, 1}, - { 0, 11, 10, 11} -}; - -static const uint8_t vc2_qm_flat_tab[][4] = { - { 0, 0, 0, 0}, - { 0, 0, 0, 0}, - { 0, 0, 0, 0}, - { 0, 0, 0, 0}, - { 0, 0, 0, 0} -}; - -static void init_quant_matrix(VC2EncContext *s) -{ - int level, orientation; - - if (s->wavelet_depth <= 4 && s->quant_matrix == VC2_QM_DEF) { - s->custom_quant_matrix = 0; - for (level = 0; level < s->wavelet_depth; level++) { - s->quant[level][0] = ff_dirac_default_qmat[s->wavelet_idx][level][0]; - s->quant[level][1] = ff_dirac_default_qmat[s->wavelet_idx][level][1]; - s->quant[level][2] = ff_dirac_default_qmat[s->wavelet_idx][level][2]; - s->quant[level][3] = ff_dirac_default_qmat[s->wavelet_idx][level][3]; - } - return; - } - - s->custom_quant_matrix = 1; - - if (s->quant_matrix == VC2_QM_DEF) { - for (level = 0; level < s->wavelet_depth; level++) { - for (orientation = 0; orientation < 4; orientation++) { - if (level <= 3) - s->quant[level][orientation] = ff_dirac_default_qmat[s->wavelet_idx][level][orientation]; - else - s->quant[level][orientation] = vc2_qm_col_tab[level][orientation]; - } - } - } else if (s->quant_matrix == VC2_QM_COL) { - for (level = 0; level < s->wavelet_depth; level++) { - for (orientation = 0; orientation < 4; orientation++) { - s->quant[level][orientation] = vc2_qm_col_tab[level][orientation]; - } - } - } else { - for (level = 0; level < s->wavelet_depth; level++) { - for (orientation = 0; orientation < 4; orientation++) { - s->quant[level][orientation] = vc2_qm_flat_tab[level][orientation]; - } - } - } -} - -/* VC-2 12.3.4.2 - quant_matrix() */ -static void encode_quant_matrix(VC2EncContext *s) -{ - int level; - put_bits(&s->pb, 1, s->custom_quant_matrix); - if (s->custom_quant_matrix) { - put_vc2_ue_uint(&s->pb, s->quant[0][0]); - for (level = 0; level < s->wavelet_depth; level++) { - put_vc2_ue_uint(&s->pb, s->quant[level][1]); - put_vc2_ue_uint(&s->pb, s->quant[level][2]); - put_vc2_ue_uint(&s->pb, s->quant[level][3]); - } - } -} - -/* VC-2 12.3 - transform_parameters() */ -static void encode_transform_params(VC2EncContext *s) -{ - put_vc2_ue_uint(&s->pb, s->wavelet_idx); - put_vc2_ue_uint(&s->pb, s->wavelet_depth); - - encode_slice_params(s); - encode_quant_matrix(s); -} - -/* VC-2 12.2 - wavelet_transform() */ -static void encode_wavelet_transform(VC2EncContext *s) -{ - encode_transform_params(s); - align_put_bits(&s->pb); -} - -/* VC-2 12 - picture_parse() */ -static void encode_picture_start(VC2EncContext *s) -{ - align_put_bits(&s->pb); - encode_picture_header(s); - align_put_bits(&s->pb); - encode_wavelet_transform(s); -} - -#define QUANT(c, mul, add, shift) (((mul) * (c) + (add)) >> (shift)) - /* VC-2 13.5.5.2 - slice_band() */ static void encode_subband(const VC2EncContext *s, PutBitContext *pb, int sx, int sy, const SubBand *b, int quant) @@ -550,7 +70,7 @@ static void encode_subband(const VC2EncContext *s, PutBitContext *pb, for (y = top; y < bottom; y++) { for (x = left; x < right; x++) { uint32_t c_abs = QUANT(FFABS(coeff[x]), q_m, q_a, q_s); - put_vc2_ue_uint_inline(pb, c_abs); + ff_put_vc2_ue_uint_inline(pb, c_abs); if (c_abs) put_bits(pb, 1, coeff[x] < 0); } @@ -558,6 +78,11 @@ static void encode_subband(const VC2EncContext *s, PutBitContext *pb, } } +static inline int count_vc2_ue_uint(uint32_t val) +{ + return 2 * av_log2(val + 1) + 1; +} + static int count_hq_slice(SliceArgs *slice, int quant_idx) { int x, y; @@ -657,7 +182,7 @@ static int calc_slice_sizes(VC2EncContext *s) SliceArgs *enc_args = s->slice_args; SliceArgs *top_loc[SLICE_REDIST_TOTAL] = {NULL}; - init_quant_matrix(s); + ff_vc2_init_quant_matrix(s, s->quant); for (slice_y = 0; slice_y < s->num_y; slice_y++) { for (slice_x = 0; slice_x < s->num_x; slice_x++) { @@ -782,7 +307,7 @@ static int encode_hq_slice(AVCodecContext *avctx, void *arg) } /* VC-2 13.5.1 - low_delay_transform_data() */ -static int encode_slices(VC2EncContext *s) +static void encode_slices(VC2EncContext *s) { uint8_t *buf; int slice_x, slice_y, skip = 0; @@ -803,8 +328,6 @@ static int encode_slices(VC2EncContext *s) sizeof(SliceArgs)); skip_put_bytes(&s->pb, skip); - - return 0; } /* @@ -902,7 +425,7 @@ static int dwt_plane(AVCodecContext *avctx, void *arg) } static int encode_frame(VC2EncContext *s, AVPacket *avpkt, const AVFrame *frame, - const char *aux_data, const int header_size, int field) + const int header_size, int field) { int i, ret; int64_t max_frame_bytes; @@ -929,25 +452,10 @@ static int encode_frame(VC2EncContext *s, AVPacket *avpkt, const AVFrame *frame, init_put_bits(&s->pb, avpkt->data, avpkt->size); } - /* Sequence header */ - encode_parse_info(s, DIRAC_PCODE_SEQ_HEADER); - encode_seq_header(s); - - /* Encoder version */ - if (aux_data) { - encode_parse_info(s, DIRAC_PCODE_AUX); - ff_put_string(&s->pb, aux_data, 1); - } - - /* Picture header */ - encode_parse_info(s, DIRAC_PCODE_PICTURE_HQ); - encode_picture_start(s); - - /* Encode slices */ + /* Encode frame */ + ff_vc2_write_frame_header(s); encode_slices(s); - - /* End sequence */ - encode_parse_info(s, DIRAC_PCODE_END_SEQ); + ff_vc2_write_sequence_end(s); return 0; } @@ -956,45 +464,20 @@ static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet) { int ret = 0; - int slice_ceil, sig_size = 256; VC2EncContext *s = avctx->priv_data; const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; - const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT; const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT); const int header_size = 100 + aux_data_size; - int64_t r_bitrate = avctx->bit_rate >> (s->interlaced); - - s->avctx = avctx; - s->size_scaler = 2; - s->prefix_bytes = 0; - s->last_parse_code = 0; - s->next_parse_offset = 0; - - /* Rate control */ - s->frame_max_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num, - s->avctx->time_base.den) >> 3) - header_size; - s->slice_max_bytes = slice_ceil = av_rescale(s->frame_max_bytes, 1, s->num_x*s->num_y); - - /* Find an appropriate size scaler */ - while (sig_size > 255) { - int r_size = SSIZE_ROUND(s->slice_max_bytes); - if (r_size > slice_ceil) { - s->slice_max_bytes -= r_size - slice_ceil; - r_size = SSIZE_ROUND(s->slice_max_bytes); - } - sig_size = r_size/s->size_scaler; /* Signalled slize size */ - s->size_scaler <<= 1; - } - s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f); - if (s->slice_min_bytes < 0 || s->slice_max_bytes > INT_MAX >> 3) - return AVERROR(EINVAL); + ret = ff_vc2_frame_init(avctx, s); + if (ret) + return ret; - ret = encode_frame(s, avpkt, frame, aux_data, header_size, s->interlaced); + ret = encode_frame(s, avpkt, frame, header_size, s->interlaced); if (ret) return ret; if (s->interlaced) { - ret = encode_frame(s, avpkt, frame, aux_data, header_size, 2); + ret = encode_frame(s, avpkt, frame, header_size, 2); if (ret) return ret; } @@ -1026,83 +509,13 @@ static av_cold int vc2_encode_end(AVCodecContext *avctx) static av_cold int vc2_encode_init(AVCodecContext *avctx) { - static AVOnce init_static_once = AV_ONCE_INIT; Plane *p; SubBand *b; - int i, level, o, shift; + int ret, i, level, o, shift; const AVPixFmtDescriptor *pixdesc; int depth; VC2EncContext *s = avctx->priv_data; - s->picture_number = 0; - - /* Total allowed quantization range */ - s->q_ceil = DIRAC_MAX_QUANT_INDEX; - - s->ver.major = 2; - s->ver.minor = 0; - s->profile = 3; - s->level = 3; - - s->base_vf = -1; - s->strict_compliance = 1; - - s->q_avg = 0; - s->slice_max_bytes = 0; - s->slice_min_bytes = 0; - - /* Mark unknown as progressive */ - s->interlaced = !((avctx->field_order == AV_FIELD_UNKNOWN) || - (avctx->field_order == AV_FIELD_PROGRESSIVE)); - - for (i = 0; i < base_video_fmts_len; i++) { - const VC2BaseVideoFormat *fmt = &base_video_fmts[i]; - if (avctx->pix_fmt != fmt->pix_fmt) - continue; - if (avctx->time_base.num != fmt->time_base.num) - continue; - if (avctx->time_base.den != fmt->time_base.den) - continue; - if (avctx->width != fmt->width) - continue; - if (avctx->height != fmt->height) - continue; - if (s->interlaced != fmt->interlaced) - continue; - s->base_vf = i; - s->level = base_video_fmts[i].level; - break; - } - - if (s->interlaced) - av_log(avctx, AV_LOG_WARNING, "Interlacing enabled!\n"); - - if ((s->slice_width & (s->slice_width - 1)) || - (s->slice_height & (s->slice_height - 1))) { - av_log(avctx, AV_LOG_ERROR, "Slice size is not a power of two!\n"); - return AVERROR(EINVAL); - } - - if ((s->slice_width > avctx->width) || - (s->slice_height > avctx->height)) { - av_log(avctx, AV_LOG_ERROR, "Slice size is bigger than the image!\n"); - return AVERROR(EINVAL); - } - - if (s->base_vf <= 0) { - if (avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) { - s->strict_compliance = s->base_vf = 0; - av_log(avctx, AV_LOG_WARNING, "Format does not strictly comply with VC2 specs\n"); - } else { - av_log(avctx, AV_LOG_WARNING, "Given format does not strictly comply with " - "the specifications, decrease strictness to use it.\n"); - return AVERROR(EINVAL); - } - } else { - av_log(avctx, AV_LOG_INFO, "Selected base video format = %i (%s)\n", - s->base_vf, base_video_fmts[s->base_vf].name); - } - pixdesc = av_pix_fmt_desc_get(avctx->pix_fmt); /* Chroma subsampling */ s->chroma_x_shift = pixdesc->log2_chroma_w; @@ -1110,47 +523,21 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) /* Bit depth and color range index */ depth = pixdesc->comp[0].depth; - if (depth == 8 && avctx->color_range == AVCOL_RANGE_JPEG) { - s->bpp = 1; - s->bpp_idx = 1; - s->diff_offset = 128; - } else if (depth == 8 && (avctx->color_range == AVCOL_RANGE_MPEG || - avctx->color_range == AVCOL_RANGE_UNSPECIFIED)) { - s->bpp = 1; - s->bpp_idx = 2; - s->diff_offset = 128; - } else if (depth == 10) { - s->bpp = 2; - s->bpp_idx = 3; - s->diff_offset = 512; - } else { - s->bpp = 2; - s->bpp_idx = 4; - s->diff_offset = 2048; - } + + /* Context initialization */ + ret = ff_vc2_encode_init(avctx, depth); + if (ret < 0) + return ret; /* Planes initialization */ for (i = 0; i < 3; i++) { - int w, h; p = &s->plane[i]; - p->width = avctx->width >> (i ? s->chroma_x_shift : 0); - p->height = avctx->height >> (i ? s->chroma_y_shift : 0); - if (s->interlaced) - p->height >>= 1; - p->dwt_width = w = FFALIGN(p->width, (1 << s->wavelet_depth)); - p->dwt_height = h = FFALIGN(p->height, (1 << s->wavelet_depth)); - p->coef_stride = FFALIGN(p->dwt_width, 32); p->coef_buf = av_mallocz(p->coef_stride*p->dwt_height*sizeof(dwtcoef)); if (!p->coef_buf) return AVERROR(ENOMEM); for (level = s->wavelet_depth-1; level >= 0; level--) { - w = w >> 1; - h = h >> 1; for (o = 0; o < 4; o++) { b = &p->band[level][o]; - b->width = w; - b->height = h; - b->stride = p->coef_stride; shift = (o > 1)*b->height*b->stride + (o & 1)*b->width; b->buf = p->coef_buf + shift; } @@ -1164,10 +551,6 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) return AVERROR(ENOMEM); } - /* Slices */ - s->num_x = s->plane[0].dwt_width/s->slice_width; - s->num_y = s->plane[0].dwt_height/s->slice_height; - s->slice_args = av_calloc(s->num_x*s->num_y, sizeof(SliceArgs)); if (!s->slice_args) return AVERROR(ENOMEM); @@ -1189,8 +572,6 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) } } - ff_thread_once(&init_static_once, vc2_init_static_data); - return 0; } diff --git a/libavcodec/vc2enc_common.c b/libavcodec/vc2enc_common.c new file mode 100644 index 0000000000..bb24711eb2 --- /dev/null +++ b/libavcodec/vc2enc_common.c @@ -0,0 +1,571 @@ +/* +* Copyright (C) 2016 Open Broadcast Systems Ltd. +* Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com> +* +* This file is part of FFmpeg. +* +* FFmpeg is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2.1 of the License, or (at your option) any later version. +* +* FFmpeg is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with FFmpeg; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "libavutil/pixdesc.h" +#include "libavutil/thread.h" +#include "vc2enc_common.h" +#include "version.h" + +typedef struct VC2BaseVideoFormat { + enum AVPixelFormat pix_fmt; + AVRational time_base; + int width, height; + uint8_t interlaced, level; + char name[13]; +} VC2BaseVideoFormat; + +static const VC2BaseVideoFormat base_video_fmts[] = { + { 0 }, /* Custom format, here just to make indexing equal to base_vf */ + { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 176, 120, 0, 1, "QSIF525" }, + { AV_PIX_FMT_YUV420P, { 2, 25 }, 176, 144, 0, 1, "QCIF" }, + { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 352, 240, 0, 1, "SIF525" }, + { AV_PIX_FMT_YUV420P, { 2, 25 }, 352, 288, 0, 1, "CIF" }, + { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 704, 480, 0, 1, "4SIF525" }, + { AV_PIX_FMT_YUV420P, { 2, 25 }, 704, 576, 0, 1, "4CIF" }, + + { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 480, 1, 2, "SD480I-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 25 }, 720, 576, 1, 2, "SD576I-50" }, + + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1280, 720, 0, 3, "HD720P-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1280, 720, 0, 3, "HD720P-50" }, + { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 1920, 1080, 1, 3, "HD1080I-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 25 }, 1920, 1080, 1, 3, "HD1080I-50" }, + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1920, 1080, 0, 3, "HD1080P-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1920, 1080, 0, 3, "HD1080P-50" }, + + { AV_PIX_FMT_YUV444P12, { 1, 24 }, 2048, 1080, 0, 4, "DC2K" }, + { AV_PIX_FMT_YUV444P12, { 1, 24 }, 4096, 2160, 0, 5, "DC4K" }, + + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 3840, 2160, 0, 6, "UHDTV 4K-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 3840, 2160, 0, 6, "UHDTV 4K-50" }, + + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 7680, 4320, 0, 7, "UHDTV 8K-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 7680, 4320, 0, 7, "UHDTV 8K-50" }, + + { AV_PIX_FMT_YUV422P10, { 1001, 24000 }, 1920, 1080, 0, 3, "HD1080P-24" }, + { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 486, 1, 2, "SD Pro486" }, +}; +static const int base_video_fmts_len = FF_ARRAY_ELEMS(base_video_fmts); + +/// x_k x_{k-1} ... x_0 -> 0 x_k 0 x_{k - 1} ... 0 x_0 +uint16_t interleaved_ue_golomb_tab[256]; +/// 1 x_{k-1} ... x_0 -> 0 0 0 x_{k - 1} ... 0 x_0 +uint16_t top_interleaved_ue_golomb_tab[256]; +/// 1 x_{k-1} ... x_0 -> 2 * k +uint8_t golomb_len_tab[256]; + +static av_cold void vc2_init_static_data(void) +{ + interleaved_ue_golomb_tab[1] = 1; + for (unsigned i = 2; i < 256; ++i) { + golomb_len_tab[i] = golomb_len_tab[i >> 1] + 2; + interleaved_ue_golomb_tab[i] = (interleaved_ue_golomb_tab[i >> 1] << 2) | (i & 1); + top_interleaved_ue_golomb_tab[i] = interleaved_ue_golomb_tab[i] ^ (1 << golomb_len_tab[i]); + } +} + +static void put_vc2_ue_uint(PutBitContext *pb, uint32_t val) +{ + ff_put_vc2_ue_uint_inline(pb, val); +} + +/* VC-2 10.4 - parse_info() */ +static void encode_parse_info(VC2EncContext *s, enum DiracParseCodes pcode) +{ + uint32_t cur_pos, dist; + + align_put_bits(&s->pb); + + cur_pos = put_bytes_count(&s->pb, 0); + + /* Magic string */ + ff_put_string(&s->pb, "BBCD", 0); + + /* Parse code */ + put_bits(&s->pb, 8, pcode); + + /* Next parse offset */ + dist = cur_pos - s->next_parse_offset; + AV_WB32(s->pb.buf + s->next_parse_offset + 5, dist); + s->next_parse_offset = cur_pos; + put_bits32(&s->pb, pcode == DIRAC_PCODE_END_SEQ ? 13 : 0); + + cur_pos = put_bytes_count(&s->pb, 0); + + /* Last parse offset */ + put_bits32(&s->pb, s->last_parse_code == DIRAC_PCODE_END_SEQ ? 13 : dist); + + s->last_parse_code = pcode; +} + +/* VC-2 11.1 - parse_parameters() +* The level dictates what the decoder should expect in terms of resolution +* and allows it to quickly reject whatever it can't support. Remember, +* this codec kinda targets cheapo FPGAs without much memory. Unfortunately +* it also limits us greatly in our choice of formats, hence the flag to disable +* strict_compliance */ +static void encode_parse_params(VC2EncContext *s) +{ + put_vc2_ue_uint(&s->pb, s->ver.major); /* VC-2 demands this to be 2 */ + put_vc2_ue_uint(&s->pb, s->ver.minor); /* ^^ and this to be 0 */ + put_vc2_ue_uint(&s->pb, s->profile); /* 3 to signal HQ profile */ + put_vc2_ue_uint(&s->pb, s->level); /* 3 - 1080/720, 6 - 4K */ +} + +/* VC-2 11.3 - frame_size() */ +static void encode_frame_size(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) { + AVCodecContext *avctx = s->avctx; + put_vc2_ue_uint(&s->pb, avctx->width); + put_vc2_ue_uint(&s->pb, avctx->height); + } +} + +/* VC-2 11.3.3 - color_diff_sampling_format() */ +static void encode_sample_fmt(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) { + int idx; + if (s->chroma_x_shift == 1 && s->chroma_y_shift == 0) + idx = 1; /* 422 */ + else if (s->chroma_x_shift == 1 && s->chroma_y_shift == 1) + idx = 2; /* 420 */ + else + idx = 0; /* 444 */ + put_vc2_ue_uint(&s->pb, idx); + } +} + +/* VC-2 11.3.4 - scan_format() */ +static void encode_scan_format(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) + put_vc2_ue_uint(&s->pb, s->interlaced); +} + +/* VC-2 11.3.5 - frame_rate() */ +static void encode_frame_rate(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) { + AVCodecContext *avctx = s->avctx; + put_vc2_ue_uint(&s->pb, 0); + put_vc2_ue_uint(&s->pb, avctx->time_base.den); + put_vc2_ue_uint(&s->pb, avctx->time_base.num); + } +} + +/* VC-2 11.3.6 - aspect_ratio() */ +static void encode_aspect_ratio(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) { + AVCodecContext *avctx = s->avctx; + put_vc2_ue_uint(&s->pb, 0); + put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.num); + put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.den); + } +} + +/* VC-2 11.3.7 - clean_area() */ +static void encode_clean_area(VC2EncContext *s) +{ + put_bits(&s->pb, 1, 0); +} + +/* VC-2 11.3.8 - signal_range() */ +static void encode_signal_range(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) + put_vc2_ue_uint(&s->pb, s->bpp_idx); +} + +/* VC-2 11.3.9 - color_spec() */ +static void encode_color_spec(VC2EncContext *s) +{ + AVCodecContext *avctx = s->avctx; + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) { + int val; + put_vc2_ue_uint(&s->pb, 0); + + /* primaries */ + put_bits(&s->pb, 1, 1); + if (avctx->color_primaries == AVCOL_PRI_BT470BG) + val = 2; + else if (avctx->color_primaries == AVCOL_PRI_SMPTE170M) + val = 1; + else if (avctx->color_primaries == AVCOL_PRI_SMPTE240M) + val = 1; + else + val = 0; + put_vc2_ue_uint(&s->pb, val); + + /* color matrix */ + put_bits(&s->pb, 1, 1); + if (avctx->colorspace == AVCOL_SPC_RGB) + val = 3; + else if (avctx->colorspace == AVCOL_SPC_YCOCG) + val = 2; + else if (avctx->colorspace == AVCOL_SPC_BT470BG) + val = 1; + else + val = 0; + put_vc2_ue_uint(&s->pb, val); + + /* transfer function */ + put_bits(&s->pb, 1, 1); + if (avctx->color_trc == AVCOL_TRC_LINEAR) + val = 2; + else if (avctx->color_trc == AVCOL_TRC_BT1361_ECG) + val = 1; + else + val = 0; + put_vc2_ue_uint(&s->pb, val); + } +} + +/* VC-2 11.3 - source_parameters() */ +static void encode_source_params(VC2EncContext *s) +{ + encode_frame_size(s); + encode_sample_fmt(s); + encode_scan_format(s); + encode_frame_rate(s); + encode_aspect_ratio(s); + encode_clean_area(s); + encode_signal_range(s); + encode_color_spec(s); +} + +/* VC-2 11 - sequence_header() */ +static void encode_seq_header(VC2EncContext *s) +{ + align_put_bits(&s->pb); + encode_parse_params(s); + put_vc2_ue_uint(&s->pb, s->base_vf); + encode_source_params(s); + put_vc2_ue_uint(&s->pb, s->interlaced); /* Frames or fields coding */ +} + +/* VC-2 12.1 - picture_header() */ +static void encode_picture_header(VC2EncContext *s) +{ + align_put_bits(&s->pb); + put_bits32(&s->pb, s->picture_number++); +} + +/* VC-2 12.3.4.1 - slice_parameters() */ +static void encode_slice_params(VC2EncContext *s) +{ + put_vc2_ue_uint(&s->pb, s->num_x); + put_vc2_ue_uint(&s->pb, s->num_y); + put_vc2_ue_uint(&s->pb, s->prefix_bytes); + put_vc2_ue_uint(&s->pb, s->size_scaler); +} + +/* 1st idx = LL, second - vertical, third - horizontal, fourth - total */ +static const uint8_t vc2_qm_col_tab[][4] = { + {20, 9, 15, 4}, + { 0, 6, 6, 4}, + { 0, 3, 3, 5}, + { 0, 3, 5, 1}, + { 0, 11, 10, 11} +}; + +static const uint8_t vc2_qm_flat_tab[][4] = { + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0} +}; + +void ff_vc2_init_quant_matrix(VC2EncContext *s, uint8_t quant[MAX_DWT_LEVELS][4]) +{ + int level, orientation; + + if (s->wavelet_depth <= 4 && s->quant_matrix == VC2_QM_DEF) { + s->custom_quant_matrix = 0; + for (level = 0; level < s->wavelet_depth; level++) { + quant[level][0] = ff_dirac_default_qmat[s->wavelet_idx][level][0]; + quant[level][1] = ff_dirac_default_qmat[s->wavelet_idx][level][1]; + quant[level][2] = ff_dirac_default_qmat[s->wavelet_idx][level][2]; + quant[level][3] = ff_dirac_default_qmat[s->wavelet_idx][level][3]; + } + return; + } + + s->custom_quant_matrix = 1; + + if (s->quant_matrix == VC2_QM_DEF) { + for (level = 0; level < s->wavelet_depth; level++) { + for (orientation = 0; orientation < 4; orientation++) { + if (level <= 3) + quant[level][orientation] = ff_dirac_default_qmat[s->wavelet_idx][level][orientation]; + else + quant[level][orientation] = vc2_qm_col_tab[level][orientation]; + } + } + } else if (s->quant_matrix == VC2_QM_COL) { + for (level = 0; level < s->wavelet_depth; level++) { + for (orientation = 0; orientation < 4; orientation++) { + quant[level][orientation] = vc2_qm_col_tab[level][orientation]; + } + } + } else { + for (level = 0; level < s->wavelet_depth; level++) { + for (orientation = 0; orientation < 4; orientation++) { + quant[level][orientation] = vc2_qm_flat_tab[level][orientation]; + } + } + } +} + +/* VC-2 12.3.4.2 - quant_matrix() */ +static void encode_quant_matrix(VC2EncContext *s) +{ + int level; + put_bits(&s->pb, 1, s->custom_quant_matrix); + if (s->custom_quant_matrix) { + put_vc2_ue_uint(&s->pb, s->quant[0][0]); + for (level = 0; level < s->wavelet_depth; level++) { + put_vc2_ue_uint(&s->pb, s->quant[level][1]); + put_vc2_ue_uint(&s->pb, s->quant[level][2]); + put_vc2_ue_uint(&s->pb, s->quant[level][3]); + } + } +} + +/* VC-2 12.3 - transform_parameters() */ +static void encode_transform_params(VC2EncContext *s) +{ + put_vc2_ue_uint(&s->pb, s->wavelet_idx); + put_vc2_ue_uint(&s->pb, s->wavelet_depth); + + encode_slice_params(s); + encode_quant_matrix(s); +} + +/* VC-2 12.2 - wavelet_transform() */ +static void encode_wavelet_transform(VC2EncContext *s) +{ + encode_transform_params(s); + align_put_bits(&s->pb); +} + +/* VC-2 12 - picture_parse() */ +static void encode_picture_start(VC2EncContext *s) +{ + align_put_bits(&s->pb); + encode_picture_header(s); + align_put_bits(&s->pb); + encode_wavelet_transform(s); +} + +int ff_vc2_encode_init(AVCodecContext *avctx, int depth) +{ + static AVOnce init_static_once = AV_ONCE_INIT; + int i, level, o; + SubBand *b; + Plane *p; + VC2EncContext *s = avctx->priv_data; + + s->picture_number = 0; + + /* Total allowed quantization range */ + s->q_ceil = DIRAC_MAX_QUANT_INDEX; + + s->ver.major = 2; + s->ver.minor = 0; + s->profile = 3; + s->level = 3; + + s->base_vf = -1; + s->strict_compliance = 1; + + s->q_avg = 0; + s->slice_max_bytes = 0; + s->slice_min_bytes = 0; + + /* Mark unknown as progressive */ + s->interlaced = !((avctx->field_order == AV_FIELD_UNKNOWN) || + (avctx->field_order == AV_FIELD_PROGRESSIVE)); + + for (i = 0; i < base_video_fmts_len; i++) { + const VC2BaseVideoFormat *fmt = &base_video_fmts[i]; + if (avctx->pix_fmt != fmt->pix_fmt || avctx->time_base.num != fmt->time_base.num || + avctx->time_base.den != fmt->time_base.den || avctx->width != fmt->width || + avctx->height != fmt->height || s->interlaced != fmt->interlaced) + continue; + s->base_vf = i; + s->level = base_video_fmts[i].level; + break; + } + + if (s->interlaced) + av_log(avctx, AV_LOG_WARNING, "Interlacing enabled!\n"); + + if ((s->slice_width & (s->slice_width - 1)) || + (s->slice_height & (s->slice_height - 1))) { + av_log(avctx, AV_LOG_ERROR, "Slice size is not a power of two!\n"); + return AVERROR(EINVAL); + } + + if ((s->slice_width > avctx->width) || + (s->slice_height > avctx->height)) { + av_log(avctx, AV_LOG_ERROR, "Slice size is bigger than the image!\n"); + return AVERROR(EINVAL); + } + + if (s->base_vf <= 0) { + if (avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) { + s->strict_compliance = s->base_vf = 0; + av_log(avctx, AV_LOG_WARNING, "Format does not strictly comply with VC2 specs\n"); + } else { + av_log(avctx, AV_LOG_WARNING, "Given format does not strictly comply with " + "the specifications, decrease strictness to use it.\n"); + return AVERROR(EINVAL); + } + } else { + av_log(avctx, AV_LOG_INFO, "Selected base video format = %i (%s)\n", + s->base_vf, base_video_fmts[s->base_vf].name); + } + + /* Bit depth and color range index */ + if (depth == 8 && avctx->color_range == AVCOL_RANGE_JPEG) { + s->bpp = 1; + s->bpp_idx = 1; + s->diff_offset = 128; + } else if (depth == 8 && (avctx->color_range == AVCOL_RANGE_MPEG || + avctx->color_range == AVCOL_RANGE_UNSPECIFIED)) { + s->bpp = 1; + s->bpp_idx = 2; + s->diff_offset = 128; + } else if (depth == 10) { + s->bpp = 2; + s->bpp_idx = 3; + s->diff_offset = 512; + } else { + s->bpp = 2; + s->bpp_idx = 4; + s->diff_offset = 2048; + } + + /* Planes initialization */ + for (i = 0; i < 3; i++) { + int w, h; + p = &s->plane[i]; + p->width = avctx->width >> (i ? s->chroma_x_shift : 0); + p->height = avctx->height >> (i ? s->chroma_y_shift : 0); + if (s->interlaced) + p->height >>= 1; + p->dwt_width = w = FFALIGN(p->width, (1 << s->wavelet_depth)); + p->dwt_height = h = FFALIGN(p->height, (1 << s->wavelet_depth)); + p->coef_stride = FFALIGN(p->dwt_width, 32); + for (level = s->wavelet_depth-1; level >= 0; level--) { + w = w >> 1; + h = h >> 1; + for (o = 0; o < 4; o++) { + b = &p->band[level][o]; + b->width = w; + b->height = h; + b->stride = p->coef_stride; + } + } + } + + /* Slices */ + s->num_x = s->plane[0].dwt_width/s->slice_width; + s->num_y = s->plane[0].dwt_height/s->slice_height; + + ff_thread_once(&init_static_once, vc2_init_static_data); + + return 0; +} + +int ff_vc2_frame_init(AVCodecContext *avctx, VC2EncContext *s) +{ + int slice_ceil, sig_size = 256; + const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; + const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT); + const int header_size = 100 + aux_data_size; + int64_t r_bitrate = avctx->bit_rate >> (s->interlaced); + + s->avctx = avctx; + s->size_scaler = 2; + s->prefix_bytes = 0; + s->last_parse_code = 0; + s->next_parse_offset = 0; + + /* Rate control */ + s->frame_max_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num, + s->avctx->time_base.den) >> 3) - header_size; + s->slice_max_bytes = slice_ceil = av_rescale(s->frame_max_bytes, 1, s->num_x * s->num_y); + + /* Find an appropriate size scaler */ + while (sig_size > 255) { + int r_size = SSIZE_ROUND(s->slice_max_bytes); + if (r_size > slice_ceil) { + s->slice_max_bytes -= r_size - slice_ceil; + r_size = SSIZE_ROUND(s->slice_max_bytes); + } + sig_size = r_size/s->size_scaler; /* Signalled slize size */ + s->size_scaler <<= 1; + } + + s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f); + if (s->slice_min_bytes < 0 || s->slice_max_bytes > INT_MAX >> 3) + return AVERROR(EINVAL); + + return 0; +} + +void ff_vc2_write_frame_header(VC2EncContext *s) +{ + const int bitexact = s->avctx->flags & AV_CODEC_FLAG_BITEXACT; + const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT; + + /* Sequence header */ + encode_parse_info(s, DIRAC_PCODE_SEQ_HEADER); + encode_seq_header(s); + + /* Encoder version */ + if (aux_data) { + encode_parse_info(s, DIRAC_PCODE_AUX); + ff_put_string(&s->pb, aux_data, 1); + } + + /* Picture header */ + encode_parse_info(s, DIRAC_PCODE_PICTURE_HQ); + encode_picture_start(s); +} + +void ff_vc2_write_sequence_end(VC2EncContext *s) +{ + /* End sequence */ + encode_parse_info(s, DIRAC_PCODE_END_SEQ); +} diff --git a/libavcodec/vc2enc_common.h b/libavcodec/vc2enc_common.h new file mode 100644 index 0000000000..41f555c496 --- /dev/null +++ b/libavcodec/vc2enc_common.h @@ -0,0 +1,168 @@ +/* +* Copyright (C) 2016 Open Broadcast Systems Ltd. +* Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com> +* +* This file is part of FFmpeg. +* +* FFmpeg is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2.1 of the License, or (at your option) any later version. +* +* FFmpeg is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with FFmpeg; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef AVCODEC_VC2ENC_COMMON_H +#define AVCODEC_VC2ENC_COMMON_H + +#include "avcodec.h" +#include "dirac.h" +#include "put_bits.h" +#include "libavutil/attributes_internal.h" + +#include "vc2enc_dwt.h" +#include "diractab.h" + +/* The limited size resolution of each slice forces us to do this */ +#define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes) + +FF_VISIBILITY_PUSH_HIDDEN + +enum VC2_QM { + VC2_QM_DEF = 0, + VC2_QM_COL, + VC2_QM_FLAT, + + VC2_QM_NB +}; + +typedef struct SubBand { + dwtcoef *buf; + ptrdiff_t stride; + int width; + int height; + int shift; +} SubBand; + +typedef struct Plane { + SubBand band[MAX_DWT_LEVELS][4]; + dwtcoef *coef_buf; + int width; + int height; + int dwt_width; + int dwt_height; + ptrdiff_t coef_stride; +} Plane; + +typedef struct TransformArgs { + const struct VC2EncContext *ctx; + Plane *plane; + const void *idata; + ptrdiff_t istride; + int field; + VC2TransformContext t; +} TransformArgs; + +typedef struct VC2EncContext { + AVClass *av_class; + PutBitContext pb; + Plane plane[3]; + AVCodecContext *avctx; + DiracVersionInfo ver; + + struct SliceArgs *slice_args; + TransformArgs transform_args[3]; + + /* For conversion from unsigned pixel values to signed */ + int diff_offset; + int bpp; + int bpp_idx; + + /* Picture number */ + uint32_t picture_number; + + /* Base video format */ + int base_vf; + int level; + int profile; + + /* Quantization matrix */ + uint8_t quant[MAX_DWT_LEVELS][4]; + int custom_quant_matrix; + + /* Division LUT */ + uint32_t qmagic_lut[116][2]; + + int num_x; /* #slices horizontally */ + int num_y; /* #slices vertically */ + int prefix_bytes; + int size_scaler; + int chroma_x_shift; + int chroma_y_shift; + + /* Rate control stuff */ + int frame_max_bytes; + int slice_max_bytes; + int slice_min_bytes; + int q_ceil; + int q_avg; + + /* Options */ + double tolerance; + int wavelet_idx; + int wavelet_depth; + int strict_compliance; + int slice_height; + int slice_width; + int interlaced; + enum VC2_QM quant_matrix; + + /* Parse code state */ + uint32_t next_parse_offset; + enum DiracParseCodes last_parse_code; +} VC2EncContext; + +extern uint16_t interleaved_ue_golomb_tab[256]; +extern uint16_t top_interleaved_ue_golomb_tab[256]; +extern uint8_t golomb_len_tab[256]; + +static inline void ff_put_vc2_ue_uint_inline(PutBitContext *pb, uint32_t val) +{ + uint64_t pbits = 1; + int bits = 1; + + ++val; + + while (val >> 8) { + pbits |= (uint64_t)interleaved_ue_golomb_tab[val & 0xff] << bits; + val >>= 8; + bits += 16; + } + pbits |= (uint64_t)top_interleaved_ue_golomb_tab[val] << bits; + bits += golomb_len_tab[val]; + + put_bits63(pb, bits, pbits); +} + +int ff_vc2_encode_init(AVCodecContext *avctx, int depth); + +int ff_vc2_frame_init(AVCodecContext *avctx, VC2EncContext *s); + +void ff_vc2_write_frame_header(VC2EncContext *s); + +void ff_vc2_write_sequence_end(VC2EncContext *s); + +void ff_vc2_init_quant_matrix(VC2EncContext *s, uint8_t quant[MAX_DWT_LEVELS][4]); + +void ff_vc2_encode_frame(VC2EncContext *s, void(*encode_slices)(VC2EncContext*)); + +FF_VISIBILITY_POP_HIDDEN + +#endif -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v5 2/4] libavcodec/vc2enc: Switch quant to int 2025-05-23 20:23 [FFmpeg-devel] [PATCH v5 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders IndecisiveTurtle @ 2025-05-23 20:23 ` IndecisiveTurtle 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder IndecisiveTurtle 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths IndecisiveTurtle 2 siblings, 0 replies; 7+ messages in thread From: IndecisiveTurtle @ 2025-05-23 20:23 UTC (permalink / raw) To: ffmpeg-devel; +Cc: IndecisiveTurtle From: IndecisiveTurtle <geoster3d@gmail.com> Prevents compiler from mistaking it as a string Also makes passing it to the GPU in a buffer easier --- libavcodec/vc2enc_common.c | 2 +- libavcodec/vc2enc_common.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libavcodec/vc2enc_common.c b/libavcodec/vc2enc_common.c index bb24711eb2..5549eae1c1 100644 --- a/libavcodec/vc2enc_common.c +++ b/libavcodec/vc2enc_common.c @@ -304,7 +304,7 @@ static const uint8_t vc2_qm_flat_tab[][4] = { { 0, 0, 0, 0} }; -void ff_vc2_init_quant_matrix(VC2EncContext *s, uint8_t quant[MAX_DWT_LEVELS][4]) +void ff_vc2_init_quant_matrix(VC2EncContext *s, int quant[MAX_DWT_LEVELS][4]) { int level, orientation; diff --git a/libavcodec/vc2enc_common.h b/libavcodec/vc2enc_common.h index 41f555c496..929e0e49ea 100644 --- a/libavcodec/vc2enc_common.h +++ b/libavcodec/vc2enc_common.h @@ -94,7 +94,7 @@ typedef struct VC2EncContext { int profile; /* Quantization matrix */ - uint8_t quant[MAX_DWT_LEVELS][4]; + int quant[MAX_DWT_LEVELS][4]; int custom_quant_matrix; /* Division LUT */ @@ -159,7 +159,7 @@ void ff_vc2_write_frame_header(VC2EncContext *s); void ff_vc2_write_sequence_end(VC2EncContext *s); -void ff_vc2_init_quant_matrix(VC2EncContext *s, uint8_t quant[MAX_DWT_LEVELS][4]); +void ff_vc2_init_quant_matrix(VC2EncContext *s, int quant[MAX_DWT_LEVELS][4]); void ff_vc2_encode_frame(VC2EncContext *s, void(*encode_slices)(VC2EncContext*)); -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v5 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder 2025-05-23 20:23 [FFmpeg-devel] [PATCH v5 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders IndecisiveTurtle 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 2/4] libavcodec/vc2enc: Switch quant to int IndecisiveTurtle @ 2025-05-23 20:23 ` IndecisiveTurtle 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths IndecisiveTurtle 2 siblings, 0 replies; 7+ messages in thread From: IndecisiveTurtle @ 2025-05-23 20:23 UTC (permalink / raw) To: ffmpeg-devel; +Cc: IndecisiveTurtle From: IndecisiveTurtle <geoster3d@gmail.com> --- libavcodec/vulkan/common.comp | 51 ++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp index 10af9c0623..59a4a4b1a8 100644 --- a/libavcodec/vulkan/common.comp +++ b/libavcodec/vulkan/common.comp @@ -18,6 +18,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require + layout(buffer_reference, buffer_reference_align = 1) buffer u8buf { uint8_t v; }; @@ -61,22 +64,20 @@ layout(buffer_reference, buffer_reference_align = 8) buffer u64buf { #define mid_pred(a, b, c) \ max(min((a), (b)), min(max((a), (b)), (c))) -/* TODO: optimize */ + uint align(uint src, uint a) { - uint res = src % a; - if (res == 0) - return src; - return src + a - res; + return (src + a - 1) & ~(a - 1); +} + +int align(int src, int a) +{ + return (src + a - 1) & ~(a - 1); } -/* TODO: optimize */ uint64_t align64(uint64_t src, uint64_t a) { - uint64_t res = src % a; - if (res == 0) - return src; - return src + a - res; + return (src + a - 1) & ~(a - 1); } #define reverse4(src) \ @@ -146,6 +147,36 @@ void put_bits(inout PutBitContext pb, const uint32_t n, uint32_t value) } } +void put_bits63(inout PutBitContext pb, const uint32_t n, uint64_t value) +{ + if (n < pb.bit_left) { + pb.bit_buf = (pb.bit_buf << n) | value; + pb.bit_left -= uint8_t(n); + } else { + pb.bit_buf <<= pb.bit_left; + pb.bit_buf |= (value >> (n - pb.bit_left)); + +#ifdef PB_UNALIGNED + u8buf bs = u8buf(pb.buf); + [[unroll]] + for (uint8_t i = uint8_t(0); i < BUF_BYTES; i++) + bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i); +#else +#ifdef DEBUG + if ((pb.buf % BUF_BYTES) != 0) + debugPrintfEXT("put_bits buffer is not aligned!"); +#endif + + BUF_TYPE bs = BUF_TYPE(pb.buf); + bs.v = BUF_REVERSE(pb.bit_buf); +#endif + pb.buf = uint64_t(bs) + BUF_BYTES; + + pb.bit_left += BUF_BITS - uint8_t(n); + pb.bit_buf = value; + } +} + uint32_t flush_put_bits(inout PutBitContext pb) { /* Align bits to MSBs */ -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 7+ messages in thread
* [FFmpeg-devel] [PATCH v5 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths. 2025-05-23 20:23 [FFmpeg-devel] [PATCH v5 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders IndecisiveTurtle 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 2/4] libavcodec/vc2enc: Switch quant to int IndecisiveTurtle 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder IndecisiveTurtle @ 2025-05-23 20:23 ` IndecisiveTurtle 2025-05-24 11:09 ` Andreas Rheinhardt 2025-05-26 1:30 ` Michael Niedermayer 2 siblings, 2 replies; 7+ messages in thread From: IndecisiveTurtle @ 2025-05-23 20:23 UTC (permalink / raw) To: ffmpeg-devel; +Cc: IndecisiveTurtle From: IndecisiveTurtle <geoster3d@gmail.com> Performance wise, encoding a 3440x1440 1-minute video is performed in about 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it takes about 1.3 minutes on my NVIDIA GTX 1650 Haar shader has a subgroup optimized variant that applies when configured wavelet depth allows it --- configure | 1 + libavcodec/Makefile | 3 + libavcodec/allcodecs.c | 1 + libavcodec/vc2enc.c | 2 +- libavcodec/vc2enc_vulkan.c | 777 +++++++++++++++++++ libavcodec/vulkan/vc2_dwt_haar.comp | 82 ++ libavcodec/vulkan/vc2_dwt_haar_subgroup.comp | 75 ++ libavcodec/vulkan/vc2_dwt_hor_legall.comp | 82 ++ libavcodec/vulkan/vc2_dwt_upload.comp | 96 +++ libavcodec/vulkan/vc2_dwt_ver_legall.comp | 78 ++ libavcodec/vulkan/vc2_encode.comp | 173 +++++ libavcodec/vulkan/vc2_slice_sizes.comp | 170 ++++ 12 files changed, 1539 insertions(+), 1 deletion(-) create mode 100644 libavcodec/vc2enc_vulkan.c create mode 100644 libavcodec/vulkan/vc2_dwt_haar.comp create mode 100644 libavcodec/vulkan/vc2_dwt_haar_subgroup.comp create mode 100644 libavcodec/vulkan/vc2_dwt_hor_legall.comp create mode 100644 libavcodec/vulkan/vc2_dwt_upload.comp create mode 100644 libavcodec/vulkan/vc2_dwt_ver_legall.comp create mode 100644 libavcodec/vulkan/vc2_encode.comp create mode 100644 libavcodec/vulkan/vc2_slice_sizes.comp diff --git a/configure b/configure index 2e69b3c56c..09f9dff258 100755 --- a/configure +++ b/configure @@ -3132,6 +3132,7 @@ utvideo_encoder_select="bswapdsp huffman llvidencdsp" vble_decoder_select="llviddsp" vbn_decoder_select="texturedsp" vbn_encoder_select="texturedspenc" +vc2_vulkan_encoder_select="vulkan spirv_compiler" vmix_decoder_select="idctdsp" vc1_decoder_select="blockdsp h264qpel intrax8 mpegvideodec qpeldsp vc1dsp" vc1image_decoder_select="vc1_decoder" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index bdf0d6742e..20968520d7 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -772,6 +772,9 @@ OBJS-$(CONFIG_VC1_MMAL_DECODER) += mmaldec.o OBJS-$(CONFIG_VC1_QSV_DECODER) += qsvdec.o OBJS-$(CONFIG_VC1_V4L2M2M_DECODER) += v4l2_m2m_dec.o OBJS-$(CONFIG_VC2_ENCODER) += vc2enc.o vc2enc_dwt.o vc2enc_common.o diractab.o +OBJS-$(CONFIG_VC2_VULKAN_ENCODER) += vc2enc_vulkan.o vulkan/vc2_encode.o vulkan/vc2_slice_sizes.o \ + vulkan/vc2_dwt_hor_legall.o vulkan/vc2_dwt_ver_legall.o \ + vulkan/vc2_dwt_upload.o vulkan/vc2_dwt_haar.o vulkan/vc2_dwt_haar_subgroup.o OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdaudio.o OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdvideo.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index cd4f6ecd59..cd23a9490c 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -367,6 +367,7 @@ extern const FFCodec ff_vc1_mmal_decoder; extern const FFCodec ff_vc1_qsv_decoder; extern const FFCodec ff_vc1_v4l2m2m_decoder; extern const FFCodec ff_vc2_encoder; +extern const FFCodec ff_vc2_vulkan_encoder; extern const FFCodec ff_vcr1_decoder; extern const FFCodec ff_vmdvideo_decoder; extern const FFCodec ff_vmix_decoder; diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c index 22a5a1b97c..d0147ee21d 100644 --- a/libavcodec/vc2enc.c +++ b/libavcodec/vc2enc.c @@ -581,7 +581,7 @@ static const AVOption vc2enc_options[] = { {"slice_width", "Slice width", offsetof(VC2EncContext, slice_width), AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024, VC2ENC_FLAGS, .unit = "slice_width"}, {"slice_height", "Slice height", offsetof(VC2EncContext, slice_height), AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024, VC2ENC_FLAGS, .unit = "slice_height"}, {"wavelet_depth", "Transform depth", offsetof(VC2EncContext, wavelet_depth), AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5, VC2ENC_FLAGS, .unit = "wavelet_depth"}, - {"wavelet_type", "Transform type", offsetof(VC2EncContext, wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_9_7}, 0, VC2_TRANSFORMS_NB, VC2ENC_FLAGS, .unit = "wavelet_idx"}, + {"wavelet_type", "Transform type", offsetof(VC2EncContext, wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_5_3}, 0, VC2_TRANSFORMS_NB, VC2ENC_FLAGS, .unit = "wavelet_idx"}, {"9_7", "Deslauriers-Dubuc (9,7)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_9_7}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, {"5_3", "LeGall (5,3)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_5_3}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, {"haar", "Haar (with shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR_S}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, diff --git a/libavcodec/vc2enc_vulkan.c b/libavcodec/vc2enc_vulkan.c new file mode 100644 index 0000000000..289ffccec4 --- /dev/null +++ b/libavcodec/vc2enc_vulkan.c @@ -0,0 +1,777 @@ +/* + * Copyright (C) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/mem.h" +#include "libavutil/pixdesc.h" +#include "libavutil/opt.h" +#include "libavutil/thread.h" +#include "libavutil/version.h" +#include "libavutil/vulkan_spirv.h" +#include "libavutil/hwcontext_vulkan.h" +#include "libavutil/vulkan_loader.h" +#include "libavutil/vulkan.h" +#include "codec_internal.h" +#include "internal.h" +#include "encode.h" +#include "version.h" +#include "vc2enc_common.h" +#include "hwconfig.h" + +#define LEGALL_TILE_DIM 16 +#define LEGALL_WORKGROUP_X 64 +#define SLICE_WORKGROUP_X 128 +#define MAX_NUM_PLANES 3 + +extern const char *ff_source_common_comp; +extern const char *ff_source_vc2_encode_comp; +extern const char *ff_source_vc2_dwt_hor_legall_comp; +extern const char *ff_source_vc2_dwt_ver_legall_comp; +extern const char *ff_source_vc2_slice_sizes_comp; +extern const char *ff_source_vc2_dwt_upload_comp; +extern const char *ff_source_vc2_dwt_haar_comp; +extern const char *ff_source_vc2_dwt_haar_subgroup_comp; + +typedef struct VC2DwtPushData { + int s; + union { + int diff_offset; + int plane_idx; + }; + int level; +} VC2DwtPushData; + +typedef struct VC2EncAuxData { + int quant[MAX_DWT_LEVELS][4]; + int ff_dirac_qscale_tab[116]; + uint16_t interleaved_ue_golomb_tab[256]; + uint16_t top_interleaved_ue_golomb_tab[256]; + uint8_t golomb_len_tab[256]; +} VC2EncAuxData; + +typedef struct VC2EncPushData { + VkDeviceAddress pb; + int num_x; + int num_y; + int wavelet_depth; + int size_scaler; + int prefix_bytes; +} VC2EncPushData; + +typedef struct VC2EncSliceArgs { + int quant_idx; + int bytes; + int pb_start; + int pad; +} VC2EncSliceArgs; + +typedef struct VC2EncSliceCalcPushData { + int num_x; + int num_y; + int wavelet_depth; + int size_scaler; + int prefix_bytes; + int bits_ceil; + int bits_floor; +} VC2EncSliceCalcPushData; + +typedef struct VC2EncVulkanContext { + VC2EncContext base; + FFVkBuffer lut_buf; + FFVkBuffer slice_buf; + VC2EncSliceArgs *slice_args; + + /* Vulkan state */ + FFVulkanContext vkctx; + AVVulkanDeviceQueueFamily *qf; + FFVkExecPool e; + FFVkExecContext *exec; + + FFVulkanShader dwt_haar_shd; + FFVulkanShader dwt_upload_shd; + FFVulkanShader dwt_hor_shd, dwt_ver_shd; + FFVulkanShader slice_shd; + FFVulkanShader enc_shd; + AVBufferPool* dwt_buf_pool; + int haar_subgroup; + + VkBuffer plane_buf; + VC2EncPushData enc_consts; + VC2DwtPushData dwt_consts; + VC2EncSliceCalcPushData calc_consts; + + /* Intermediate frame pool */ + AVBufferRef *intermediate_frames_ref[3]; + AVFrame *intermediate_frame[AV_NUM_DATA_POINTERS]; + VkImageView intermediate_views[AV_NUM_DATA_POINTERS]; +} VC2EncVulkanContext; + +static int init_vulkan_pipeline(VC2EncVulkanContext* s, FFVkSPIRVCompiler *spv, + FFVulkanShader* shd, int push_size, + int lg_x, int lg_y, int lg_z, + const char* pl_name, const char* pl_source, + int start_desc, int num_desc) +{ + int err = 0; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanDescriptorSetBinding *desc; + + ff_vk_shader_init(vkctx, shd, pl_name, VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, lg_x, lg_y, lg_z, 0); + + av_bprintf(&shd->src, "struct SliceArgs {int quant_idx;int bytes;int pb_start;int pad;};\n"); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "src_planes", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(vkctx->frames->sw_format, FF_VK_REP_UINT), + .dimensions = 2, + .elems = av_pix_fmt_count_planes(vkctx->frames->sw_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "coef_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = "r32i", + .dimensions = 2, + .elems = 3, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "AuxData", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int lut_quant[5][4]; int ff_dirac_qscale_tab[116]; " + "uint16_t interleaved_ue_golomb_tab[256]; " + "uint16_t top_interleaved_ue_golomb_tab[256]; " + "uint8_t golomb_len_tab[256];", + }, + { + .name = "SliceBuffer", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "SliceArgs slice_args[];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc + start_desc, num_desc, 0, 0)); + + ff_vk_shader_add_push_const(shd, 0, push_size, VK_SHADER_STAGE_COMPUTE_BIT); + av_bprintf(&shd->src, "#define PB_UNALIGNED\n"); + av_bprintf(&shd->src, "#define PLANE_FMT %d\n", vkctx->frames->sw_format); + GLSLD(ff_source_common_comp); + GLSLD(pl_source); + + /* Compile Haar shader */ + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_register_exec(vkctx, &s->e, shd)); + +fail: + return err; +} + +static int init_frame_pools(AVCodecContext *avctx) +{ + int i, err = 0; + VC2EncVulkanContext *sv = avctx->priv_data; + AVHWFramesContext *frames_ctx; + AVVulkanFramesContext *vk_frames; + enum AVPixelFormat sw_format = AV_PIX_FMT_GRAY32; + + for (i = 0; i < 3; i++) { + sv->intermediate_frames_ref[i] = av_hwframe_ctx_alloc(sv->vkctx.device_ref); + if (!sv->intermediate_frames_ref[i]) + return AVERROR(ENOMEM); + + frames_ctx = (AVHWFramesContext *)sv->intermediate_frames_ref[i]->data; + frames_ctx->format = AV_PIX_FMT_VULKAN; + frames_ctx->sw_format = sw_format; + frames_ctx->width = sv->base.plane[i].dwt_width; + frames_ctx->height = sv->base.plane[i].dwt_height; + + vk_frames = frames_ctx->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT; + vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + + err = av_hwframe_ctx_init(sv->intermediate_frames_ref[i]); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n", + av_get_pix_fmt_name(sw_format), av_err2str(err)); + av_buffer_unref(&sv->intermediate_frames_ref[i]); + return err; + } + } + + return err; +} + +static void vulkan_bind_img_planes(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, VkImageView *views, + int set, int binding) +{ + for (int i = 0; i < 3; i++) + ff_vk_shader_update_img(s, e, shd, set, binding, i, + views[i], VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); +} + +static void dwt_plane_haar(VC2EncVulkanContext *s, FFVkExecContext *exec, + VkImageMemoryBarrier2* img_bar, int nb_img_bar) +{ + int p, group_x, group_y; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + Plane* plane; + + s->dwt_consts.level = s->base.wavelet_depth; + vulkan_bind_img_planes(vkctx, exec, &s->dwt_haar_shd, s->intermediate_views, 0, 0); + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_haar_shd); + + /* Haar pass */ + for (p = 0; p < 3; p++) { + plane = &s->base.plane[p]; + s->dwt_consts.plane_idx = p; + if (s->haar_subgroup) { + group_x = FFALIGN(plane->dwt_width, 8) >> 3; + group_y = FFALIGN(plane->dwt_height, 8) >> 3; + } else { + group_x = FFALIGN(plane->dwt_width, 32) >> 5; + group_y = FFALIGN(plane->dwt_height, 32) >> 5; + } + + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_haar_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2DwtPushData), &s->dwt_consts); + vk->CmdDispatch(exec->buf, group_x, group_y, 1); + } + + /* Wait for haar dispatches to complete */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); +} + +static void dwt_plane_legall(VC2EncVulkanContext *s, FFVkExecContext *exec, + VkImageMemoryBarrier2* img_bar, int nb_img_bar) +{ + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + int legall_group_x = (s->base.plane[0].dwt_height + LEGALL_WORKGROUP_X - 1) >> 6; + int legall_group_y = (s->base.plane[0].dwt_width + LEGALL_WORKGROUP_X - 1) >> 6; + int i; + + /* Perform legall wavelet trasform */ + for (i = 0; i < s->base.wavelet_depth; i++) { + s->dwt_consts.level = i; + + /* Horizontal legall pass */ + vulkan_bind_img_planes(vkctx, exec, &s->dwt_hor_shd, s->intermediate_views, 0, 0); + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_hor_shd); + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_hor_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2DwtPushData), &s->dwt_consts); + vk->CmdDispatch(exec->buf, legall_group_x, 1, 3); + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Vertical legall pass */ + vulkan_bind_img_planes(vkctx, exec, &s->dwt_ver_shd, s->intermediate_views, 0, 0); + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_ver_shd); + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_ver_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2DwtPushData), &s->dwt_consts); + vk->CmdDispatch(exec->buf, legall_group_y, 1, 3); + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + } +} + +static int dwt_planes(VC2EncVulkanContext *s, AVFrame *frame) +{ + int i, err = 0, nb_img_bar = 0; + int wavelet_idx = s->base.wavelet_idx; + int group_x = s->base.plane[0].dwt_width >> 3; + int group_y = s->base.plane[0].dwt_height >> 3; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + FFVkExecContext *exec = s->exec; + VkImageView views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + + /* Generate barriers and image views for frame images. */ + RET(ff_vk_exec_add_dep_frame(vkctx, exec, frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, views, frame, FF_VK_REP_UINT)); + ff_vk_frame_barrier(vkctx, exec, frame, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* Submit the image barriers. */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Create a temporaty frames */ + nb_img_bar = 0; + for (i = 0; i < 3; i++) { + s->intermediate_frame[i] = av_frame_alloc(); + if (!s->intermediate_frame[i]) + return AVERROR(ENOMEM); + + RET(av_hwframe_get_buffer(s->intermediate_frames_ref[i], + s->intermediate_frame[i], 0)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, s->intermediate_frame[i], + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, &s->intermediate_views[i], + s->intermediate_frame[i], FF_VK_REP_INT)); + ff_vk_frame_barrier(vkctx, exec, s->intermediate_frame[i], img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + } + + /* Submit the image barriers. */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Bind input images to the shader. */ + ff_vk_shader_update_img_array(vkctx, exec, &s->dwt_upload_shd, frame, views, 0, 0, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + vulkan_bind_img_planes(vkctx, exec, &s->dwt_upload_shd, s->intermediate_views, 0, 1); + + /* Upload coefficients from planes to the buffer. */ + s->dwt_consts.diff_offset = s->base.diff_offset; + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_upload_shd); + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_upload_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2DwtPushData), &s->dwt_consts); + vk->CmdDispatch(exec->buf, group_x, group_y, 1); + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Perform wavelet trasform. */ + if (wavelet_idx == VC2_TRANSFORM_HAAR || wavelet_idx == VC2_TRANSFORM_HAAR_S) + dwt_plane_haar(s, exec, img_bar, nb_img_bar); + else if (wavelet_idx == VC2_TRANSFORM_5_3) + dwt_plane_legall(s, exec, img_bar, nb_img_bar); + +fail: + return err; +} + +static void encode_slices(VC2EncVulkanContext *sv) +{ + VC2EncContext *s = &sv->base; + FFVkExecContext *exec = sv->exec; + int num_slices = s->num_x * s->num_y; + int num_slice_groups = (num_slices + SLICE_WORKGROUP_X - 1) >> 7; + int i, skip = 0; + FFVulkanContext *vkctx = &sv->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + /* Calculate slice sizes. */ + vulkan_bind_img_planes(vkctx, exec, &sv->slice_shd, sv->intermediate_views, 0, 0); + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->slice_shd, + 0, 1, 0, &sv->lut_buf, 0, + sizeof(VC2EncAuxData), + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->slice_shd, + 0, 2, 0, &sv->slice_buf, 0, + sv->slice_buf.size, + VK_FORMAT_UNDEFINED); + ff_vk_exec_bind_shader(vkctx, exec, &sv->slice_shd); + ff_vk_shader_update_push_const(vkctx, exec, &sv->slice_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2EncSliceCalcPushData), &sv->calc_consts); + vk->CmdDispatch(exec->buf, num_slice_groups, 1, 1); + + flush_put_bits(&s->pb); + sv->enc_consts.pb += put_bytes_output(&s->pb); + + /* Wait for slice sizes to be written. */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = sv->slice_buf.buf, + .size = sizeof(VC2EncSliceArgs) * num_slices, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1U, + }); + + /* Perform the encoding. */ + vulkan_bind_img_planes(vkctx, exec, &sv->enc_shd, sv->intermediate_views, 0, 0); + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->enc_shd, + 0, 1, 0, &sv->lut_buf, 0, + sizeof(VC2EncAuxData), + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->enc_shd, + 0, 2, 0, &sv->slice_buf, 0, + sv->slice_buf.size, + VK_FORMAT_UNDEFINED); + ff_vk_exec_bind_shader(vkctx, exec, &sv->enc_shd); + ff_vk_shader_update_push_const(vkctx, exec, &sv->enc_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2EncPushData), &sv->enc_consts); + + vk->CmdDispatch(exec->buf, num_slice_groups, 1, 1); + + ff_vk_exec_submit(vkctx, exec); + ff_vk_exec_wait(vkctx, exec); + + for (int slice_y = 0; slice_y < s->num_y; slice_y++) { + for (int slice_x = 0; slice_x < s->num_x; slice_x++) { + VC2EncSliceArgs *args = &sv->slice_args[s->num_x * slice_y + slice_x]; + skip += args->bytes; + } + } + + /* Skip forward to write end header */ + skip_put_bytes(&s->pb, skip); + + /* Free allocated intermediate frames */ + for (i = 0; i < 3; i++) + av_frame_free(&sv->intermediate_frame[i]); +} + +static int encode_frame(VC2EncVulkanContext *sv, AVPacket *avpkt, + const AVFrame *frame, const int header_size) +{ + int ret; + int64_t max_frame_bytes; + AVBufferRef *avpkt_buf = NULL; + FFVkBuffer* buf_vk = NULL; + VC2EncContext* s = &sv->base; + FFVulkanContext *vkctx = &sv->vkctx; + + /* Perform wavelet pass on the input data. */ + ret = dwt_planes(sv, (AVFrame*)frame); + if (ret) + return ret; + + /* Allocate a buffer that can fit at all all 3 planes of data */ + max_frame_bytes = header_size + MAX_NUM_PLANES * s->avctx->width + * s->avctx->height + * sizeof(dwtcoef); + + /* Get a pooled device local host visible buffer for writing output data */ + ret = ff_vk_get_pooled_buffer(vkctx, &sv->dwt_buf_pool, &avpkt_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, NULL, + max_frame_bytes, + VK_MEMORY_PROPERTY_HOST_CACHED_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (ret < 0) + return ret; + + ff_vk_exec_add_dep_buf(vkctx, sv->exec, &avpkt_buf, 1, 1); + buf_vk = (FFVkBuffer *)avpkt_buf->data; + sv->enc_consts.pb = buf_vk->address; + + /* Initialize packet. */ + avpkt->buf = avpkt_buf; + avpkt->data = buf_vk->mapped_mem; + avpkt->size = max_frame_bytes; + init_put_bits(&s->pb, avpkt->data, avpkt->size); + + /* Encode frame */ + ff_vc2_write_frame_header(s); + encode_slices(sv); + ff_vc2_write_sequence_end(s); + + return 0; +} + +static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, + const AVFrame *frame, int *got_packet) +{ + int ret = 0; + VC2EncVulkanContext *sv = avctx->priv_data; + VC2EncContext *s = &sv->base; + const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; + const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT); + const int header_size = 100 + aux_data_size; + + ret = ff_vc2_frame_init(avctx, s); + if (ret) + return ret; + + sv->calc_consts.size_scaler = s->size_scaler; + sv->calc_consts.bits_ceil = s->slice_max_bytes << 3; + sv->calc_consts.bits_floor = s->slice_min_bytes << 3; + sv->enc_consts.prefix_bytes = 0; + sv->enc_consts.size_scaler = s->size_scaler; + + sv->exec = ff_vk_exec_get(&sv->vkctx, &sv->e); + ff_vk_exec_start(&sv->vkctx, sv->exec); + + ret = encode_frame(sv, avpkt, frame, header_size); + if (ret) + return ret; + + flush_put_bits(&s->pb); + av_shrink_packet(avpkt, put_bytes_output(&s->pb)); + avpkt->flags |= AV_PKT_FLAG_KEY; + *got_packet = 1; + + return 0; +} + +static av_cold int vc2_encode_end(AVCodecContext *avctx) +{ + VC2EncVulkanContext *sv = avctx->priv_data; + FFVulkanContext *vkctx = &sv->vkctx; + int i; + + ff_vk_exec_pool_free(vkctx, &sv->e); + + ff_vk_shader_free(vkctx, &sv->dwt_upload_shd); + ff_vk_shader_free(vkctx, &sv->dwt_haar_shd); + ff_vk_shader_free(vkctx, &sv->dwt_hor_shd); + ff_vk_shader_free(vkctx, &sv->dwt_ver_shd); + ff_vk_shader_free(vkctx, &sv->slice_shd); + ff_vk_shader_free(vkctx, &sv->enc_shd); + + ff_vk_free_buf(vkctx, &sv->slice_buf); + ff_vk_free_buf(vkctx, &sv->lut_buf); + + for (i = 0; i < 3; i++) { + ff_vc2enc_free_transforms(&sv->base.transform_args[i].t); + av_buffer_unref(&sv->intermediate_frames_ref[i]); + } + + av_buffer_pool_uninit(&sv->dwt_buf_pool); + ff_vk_uninit(vkctx); + + return 0; +} + +static av_cold int vc2_encode_init(AVCodecContext *avctx) +{ + int err = 0, depth; + const AVPixFmtDescriptor *fmt; + VC2EncVulkanContext *sv = avctx->priv_data; + VC2EncContext *s = &sv->base; + FFVulkanContext *vkctx = &sv->vkctx; + FFVkSPIRVCompiler *spv; + VC2EncAuxData *ad = NULL; + unsigned int subgroup_size = vkctx->subgroup_props.maxSubgroupSize; + + /* Init vulkan */ + err = ff_vk_init(&sv->vkctx, avctx, NULL, avctx->hw_frames_ctx); + if (err < 0) + return err; + + sv->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!sv->qf) { + av_log(avctx, AV_LOG_ERROR, "Device has no compute queues!\n"); + return AVERROR(ENOTSUP); + } + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_exec_pool_init(vkctx, sv->qf, &sv->e, 1, 0, 0, 0, NULL); + + /* Chroma subsampling */ + err = av_pix_fmt_get_chroma_sub_sample(vkctx->frames->sw_format, &s->chroma_x_shift, + &s->chroma_y_shift); + if (err < 0) + return err; + + /* Bit depth and color range index */ + fmt = av_pix_fmt_desc_get(vkctx->frames->sw_format); + depth = fmt->comp[0].depth; + + /* 16-bit depth is unsupported by this encoder */ + if (depth == 16) { + av_log(avctx, AV_LOG_ERROR, "16-bit pixel format depth is unsupported by this encoder\n"); + return AVERROR(ENOTSUP); + } + + /* Perform common initialization. */ + err = ff_vc2_encode_init(avctx, depth); + if (err < 0) + return err; + + /* Initialize Haar push data */ + sv->dwt_consts.diff_offset = s->diff_offset; + sv->dwt_consts.s = s->wavelet_idx == VC2_TRANSFORM_HAAR_S ? 1 : 0; + sv->dwt_consts.level = 0; + + /* Initializer slice calculation push data */ + sv->calc_consts.num_x = s->num_x; + sv->calc_consts.num_y = s->num_y; + sv->calc_consts.wavelet_depth = s->wavelet_depth; + sv->calc_consts.prefix_bytes = s->prefix_bytes; + + /* Initialize encoder push data */ + sv->enc_consts.wavelet_depth = s->wavelet_depth; + sv->enc_consts.num_x = s->num_x; + sv->enc_consts.num_y = s->num_y; + + /* Create buffer for encoder auxilary data. */ + RET(ff_vk_create_buf(vkctx, &sv->lut_buf, sizeof(VC2EncAuxData), NULL, NULL, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(vkctx, &sv->lut_buf, (void *)&ad, 0)); + ff_vc2_init_quant_matrix(s, ad->quant); + memcpy(ad->ff_dirac_qscale_tab, ff_dirac_qscale_tab, sizeof(ff_dirac_qscale_tab)); + memcpy(ad->interleaved_ue_golomb_tab, interleaved_ue_golomb_tab, sizeof(interleaved_ue_golomb_tab)); + memcpy(ad->top_interleaved_ue_golomb_tab, top_interleaved_ue_golomb_tab, sizeof(top_interleaved_ue_golomb_tab)); + memcpy(ad->golomb_len_tab, golomb_len_tab, sizeof(golomb_len_tab)); + RET(ff_vk_unmap_buffer(vkctx, &sv->lut_buf, 1)); + + /* Create buffer for encoder auxilary data. */ + RET(ff_vk_create_buf(vkctx, &sv->slice_buf, + sizeof(VC2EncSliceArgs) * s->num_x * s->num_y, + NULL, NULL, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(vkctx, &sv->slice_buf, (void *)&sv->slice_args, 0)); + memset(sv->slice_args, 0, sv->slice_buf.size); + + /* Initialize intermediate frame pool. */ + RET(init_frame_pools(avctx)); + + /* Initialize encoding pipelines */ + init_vulkan_pipeline(sv, spv, &sv->dwt_upload_shd, sizeof(VC2DwtPushData), + 8, 8, 1, "dwt_upload_pl", ff_source_vc2_dwt_upload_comp, 0, 2); + init_vulkan_pipeline(sv, spv, &sv->slice_shd, sizeof(VC2EncPushData), + SLICE_WORKGROUP_X, 1, 1, "slice_pl", ff_source_vc2_slice_sizes_comp, 1, 3); + init_vulkan_pipeline(sv, spv, &sv->enc_shd, sizeof(VC2EncPushData), + SLICE_WORKGROUP_X, 1, 1, "enc_pl", ff_source_vc2_encode_comp, 1, 3); + sv->haar_subgroup = 0; + + if (s->wavelet_idx == VC2_TRANSFORM_HAAR || s->wavelet_idx == VC2_TRANSFORM_HAAR_S) { + if (subgroup_size == 32 && s->wavelet_depth < 3) { + init_vulkan_pipeline(sv, spv, &sv->dwt_haar_shd, sizeof(VC2DwtPushData), + 64, 1, 1, "dwt_haar_pl", ff_source_vc2_dwt_haar_subgroup_comp, 1, 1); + sv->haar_subgroup = 1; + } else if (subgroup_size == 64 && s->wavelet_depth < 4) { + init_vulkan_pipeline(sv, spv, &sv->dwt_haar_shd, sizeof(VC2DwtPushData), + 64, 1, 1, "dwt_haar_pl", ff_source_vc2_dwt_haar_subgroup_comp, 1, 1); + sv->haar_subgroup = 1; + } else { + init_vulkan_pipeline(sv, spv, &sv->dwt_haar_shd, sizeof(VC2DwtPushData), + 32, 32, 1, "dwt_haar_pl", ff_source_vc2_dwt_haar_comp, 1, 1); + } + } else if (s->wavelet_idx == VC2_TRANSFORM_5_3) { + init_vulkan_pipeline(sv, spv, &sv->dwt_hor_shd, sizeof(VC2DwtPushData), + LEGALL_WORKGROUP_X, 1, 1, "dwt_hor_pl", ff_source_vc2_dwt_hor_legall_comp, 1, 1); + init_vulkan_pipeline(sv, spv, &sv->dwt_ver_shd, sizeof(VC2DwtPushData), + LEGALL_WORKGROUP_X, 1, 1, "dwt_ver_pl", ff_source_vc2_dwt_ver_legall_comp, 1, 1); + } + +fail: + return err; +} + +#define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption vc2enc_options[] = { + {"tolerance", "Max undershoot in percent", offsetof(VC2EncContext, tolerance), AV_OPT_TYPE_DOUBLE, {.dbl = 5.0f}, 0.0f, 45.0f, VC2ENC_FLAGS, .unit = "tolerance"}, + {"slice_width", "Slice width", offsetof(VC2EncContext, slice_width), AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024, VC2ENC_FLAGS, .unit = "slice_width"}, + {"slice_height", "Slice height", offsetof(VC2EncContext, slice_height), AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024, VC2ENC_FLAGS, .unit = "slice_height"}, + {"wavelet_depth", "Transform depth", offsetof(VC2EncContext, wavelet_depth), AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5, VC2ENC_FLAGS, .unit = "wavelet_depth"}, + {"wavelet_type", "Transform type", offsetof(VC2EncContext, wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_5_3}, 1, VC2_TRANSFORMS_NB, VC2ENC_FLAGS, .unit = "wavelet_idx"}, + {"5_3", "LeGall (5,3)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_5_3}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, + {"haar", "Haar (with shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR_S}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, + {"haar_noshift", "Haar (without shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, + {"qm", "Custom quantization matrix", offsetof(VC2EncContext, quant_matrix), AV_OPT_TYPE_INT, {.i64 = VC2_QM_DEF}, 0, VC2_QM_NB, VC2ENC_FLAGS, .unit = "quant_matrix"}, + {"default", "Default from the specifications", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_DEF}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, + {"color", "Prevents low bitrate discoloration", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_COL}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, + {"flat", "Optimize for PSNR", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_FLAT}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, + {NULL} +}; + +static const AVClass vc2enc_class = { + .class_name = "vc2_vulkan_encoder", + .category = AV_CLASS_CATEGORY_ENCODER, + .option = vc2enc_options, + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT +}; + +static const FFCodecDefault vc2enc_defaults[] = { + { "b", "600000000" }, + { NULL }, +}; + +static const AVCodecHWConfigInternal *const vc2_hw_configs[] = { + HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), + HW_CONFIG_ENCODER_DEVICE(NONE, VULKAN), + NULL, +}; + +const FFCodec ff_vc2_vulkan_encoder = { + .p.name = "vc2_vulkan", + CODEC_LONG_NAME("SMPTE VC-2"), + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_DIRAC, + .p.capabilities = AV_CODEC_CAP_HARDWARE, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .priv_data_size = sizeof(VC2EncVulkanContext), + .init = vc2_encode_init, + .close = vc2_encode_end, + FF_CODEC_ENCODE_CB(vc2_encode_frame), + .p.priv_class = &vc2enc_class, + .defaults = vc2enc_defaults, + CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), + .hw_configs = vc2_hw_configs, +}; diff --git a/libavcodec/vulkan/vc2_dwt_haar.comp b/libavcodec/vulkan/vc2_dwt_haar.comp new file mode 100644 index 0000000000..4806cca729 --- /dev/null +++ b/libavcodec/vulkan/vc2_dwt_haar.comp @@ -0,0 +1,82 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require + +#define LOCAL_X 1024 + +layout(push_constant, scalar) uniform ComputeInfo { + int s; + int plane_idx; + int wavelet_depth; +}; + +shared int local_coef[LOCAL_X]; + +void main() +{ + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); + ivec2 dwt_dim = imageSize(coef_buf[plane_idx]); + int value = imageLoad(coef_buf[plane_idx], coord).x; + + /* Perform Haar wavelet on the 32x32 local workgroup with shared memory */ + for (int i = 0; i < wavelet_depth; i++) + { + ivec2 mask = ivec2((1 << i) - 1); + if (any(notEqual(coord & mask, ivec2(0)))) + break; + + /* Offset between valid hor pixels for each level, +1, +2, +4 etc */ + int dist = (1 << i); + + local_coef[gl_LocalInvocationIndex] = value; + barrier(); + + /* Horizontal haar wavelet */ + uint other_id = gl_LocalInvocationIndex ^ dist; + int other = local_coef[other_id]; + int a = gl_LocalInvocationIndex < other_id ? value : other; + int b = gl_LocalInvocationIndex < other_id ? other : value; + int dst_b = (b - a) * (1 << s); + int dst_a = a * (1 << s) + ((dst_b + 1) >> 1); + value = gl_LocalInvocationIndex < other_id ? dst_a : dst_b; + + /* Offset between valid ver pixels for each level, +1, +2, +4 etc */ + dist <<= 5; + + local_coef[gl_LocalInvocationIndex] = value; + barrier(); + + /* Vertical haar wavelet */ + other_id = gl_LocalInvocationIndex ^ dist; + other = local_coef[other_id]; + a = gl_LocalInvocationIndex < other_id ? value : other; + b = gl_LocalInvocationIndex < other_id ? other : value; + dst_b = b - a; + dst_a = a + ((dst_b + 1) >> 1); + value = gl_LocalInvocationIndex < other_id ? dst_a : dst_b; + } + + /* Store value */ + imageStore(coef_buf[plane_idx], coord, ivec4(value)); +} diff --git a/libavcodec/vulkan/vc2_dwt_haar_subgroup.comp b/libavcodec/vulkan/vc2_dwt_haar_subgroup.comp new file mode 100644 index 0000000000..81b0964271 --- /dev/null +++ b/libavcodec/vulkan/vc2_dwt_haar_subgroup.comp @@ -0,0 +1,75 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_scalar_block_layout : require +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_shuffle : require + +#define TILE_DIM 8 + +layout(push_constant, scalar) uniform ComputeInfo { + int s; + int plane_idx; + int wavelet_depth; +}; + +void main() +{ + ivec2 tile_coord = ivec2(gl_WorkGroupID.xy); + ivec2 local_coord = ivec2(gl_LocalInvocationIndex & 7, gl_LocalInvocationIndex >> 3); + ivec2 coord = tile_coord * ivec2(TILE_DIM) + local_coord; + + int value = imageLoad(coef_buf[plane_idx], coord).x; + for (int i = 0; i < wavelet_depth; i++) + { + ivec2 mask = ivec2((1 << i) - 1); + if (any(notEqual(local_coord & mask, ivec2(0)))) + break; + + /* Offset between valid hor pixels for each level, +1, +2, +4 etc */ + int dist = (1 << i); + + /* Horizontal haar wavelet */ + uint other_sub_id = gl_SubgroupInvocationID ^ dist; + int other = subgroupShuffle(value, other_sub_id); + int a = gl_SubgroupInvocationID < other_sub_id ? value : other; + int b = gl_SubgroupInvocationID < other_sub_id ? other : value; + int dst_b = (b - a) * (1 << s); + int dst_a = a * (1 << s) + ((dst_b + 1) >> 1); + value = gl_SubgroupInvocationID < other_sub_id ? dst_a : dst_b; + + /* Offset between valid ver pixels for each level, +1, +2, +4 etc */ + dist <<= 3; + + /* Vertical haar wavelet */ + other_sub_id = gl_SubgroupInvocationID ^ dist; + other = subgroupShuffle(value, other_sub_id); + a = gl_SubgroupInvocationID < other_sub_id ? value : other; + b = gl_SubgroupInvocationID < other_sub_id ? other : value; + dst_b = b - a; + dst_a = a + ((dst_b + 1) >> 1); + value = gl_SubgroupInvocationID < other_sub_id ? dst_a : dst_b; + } + + /* Store value */ + imageStore(coef_buf[plane_idx], coord, ivec4(value)); +} diff --git a/libavcodec/vulkan/vc2_dwt_hor_legall.comp b/libavcodec/vulkan/vc2_dwt_hor_legall.comp new file mode 100644 index 0000000000..bada2ee1fd --- /dev/null +++ b/libavcodec/vulkan/vc2_dwt_hor_legall.comp @@ -0,0 +1,82 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require + +layout(push_constant, scalar) uniform ComputeInfo { + int s; + int diff_offset; + int level; +}; + +int image_load(int coord_x) +{ + int coord_y = int(gl_GlobalInvocationID.x); + return imageLoad(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y)).x; +} + +void image_store(int coord_x, int value) +{ + int coord_y = int(gl_GlobalInvocationID.x); + imageStore(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y), ivec4(value)); +} + +void main() +{ + int coord_y = int(gl_GlobalInvocationID.x); + uint plane_idx = gl_GlobalInvocationID.z; + ivec2 work_area = imageSize(coef_buf[plane_idx]); + int dist = 1 << level; + if (coord_y >= work_area.y || (coord_y & (dist - 1)) != 0) + return; + + // Shift in one bit that is used for additional precision + for (int x = 0; x < work_area.x; x += dist) + image_store(x, image_load(x) << 1); + + // Lifting stage 2 + for (int x = 0; x < work_area.x - 2 * dist; x += 2 * dist) { + int lhs = image_load(x); + int rhs = image_load(x + 2 * dist); + int value = image_load(x + dist); + value -= (lhs + rhs + 1) >> 1; + image_store(x + dist, value); + } + int lhs = image_load(work_area.x - 2 * dist); + int value = image_load(work_area.x - dist); + value -= (2 * lhs + 1) >> 1; + image_store(work_area.x - dist, value); + + // Lifting stage 1 + lhs = image_load(dist); + value = image_load(0); + value += (2 * lhs + 2) >> 2; + image_store(0, value); + for (int x = 2 * dist; x <= work_area.x - 2 * dist; x += 2 * dist) { + int lhs = image_load(x - dist); + int rhs = image_load(x + dist); + int value = image_load(x); + value += (lhs + rhs + 2) >> 2; + image_store(x, value); + } +} diff --git a/libavcodec/vulkan/vc2_dwt_upload.comp b/libavcodec/vulkan/vc2_dwt_upload.comp new file mode 100644 index 0000000000..c758fd867f --- /dev/null +++ b/libavcodec/vulkan/vc2_dwt_upload.comp @@ -0,0 +1,96 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_shader_explicit_arithmetic_types : require + +#define AV_PIX_FMT_XV30 214 +#define AV_PIX_FMT_XV36 216 +#define AV_PIX_FMT_XV48 242 +#define AV_PIX_FMT_P212 222 +#define AV_PIX_FMT_P012 209 +#define AV_PIX_FMT_P210 198 +#define AV_PIX_FMT_P016 169 +#define AV_PIX_FMT_P010 158 +#define AV_PIX_FMT_NV16 101 +#define AV_PIX_FMT_NV12 23 + +#define Y 0 +#define U 1 +#define V 2 + +layout(push_constant, scalar) uniform ComputeInfo { + int s; + int diff_offset; + int level; +}; + +uvec4 load_plane(uint plane_idx) +{ + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); + return imageLoad(src_planes[plane_idx], coord); +} + +void store_plane(uint plane_idx, uint value) +{ + int result = int(value - diff_offset); + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); + imageStore(coef_buf[plane_idx], coord, ivec4(result)); +} + +void main() +{ + uvec4 p0 = load_plane(0); +#if PLANE_FMT == AV_PIX_FMT_XV30 + store_plane(Y, (p0.x >> 10) & 0x3FF); + store_plane(U, p0.x & 0x3FF); + store_plane(V, (p0.x >> 20) & 0x3FF); +#elif PLANE_FMT == AV_PIX_FMT_XV36 + store_plane(Y, p0.y >> 4); + store_plane(U, p0.x >> 4); + store_plane(V, p0.z >> 4); +#elif PLANE_FMT == AV_PIX_FMT_NV12 + uvec4 p1 = load_plane(1); + store_plane(Y, p0.x | p0.y << 8); + store_plane(U, p1.x); + store_plane(V, p1.y); +#elif PLANE_FMT == AV_PIX_FMT_NV16 + uvec4 p1 = load_plane(1); + store_plane(Y, p0.x); + store_plane(U, p1.x); + store_plane(V, p1.y); +#elif PLANE_FMT == AV_PIX_FMT_P010 || PLANE_FMT == AV_PIX_FMT_P210 + uvec4 p1 = load_plane(1); + store_plane(Y, p0.x >> 6); + store_plane(U, p1.x >> 6); + store_plane(V, p1.y >> 6); +#elif PLANE_FMT == AV_PIX_FMT_P012 || PLANE_FMT == AV_PIX_FMT_P212 + uvec4 p1 = load_plane(1); + store_plane(Y, p0.x >> 4); + store_plane(U, p1.x >> 4); + store_plane(V, p1.y >> 4); +#else + store_plane(Y, p0.x); + store_plane(U, load_plane(1).x); + store_plane(V, load_plane(2).x); +#endif +} diff --git a/libavcodec/vulkan/vc2_dwt_ver_legall.comp b/libavcodec/vulkan/vc2_dwt_ver_legall.comp new file mode 100644 index 0000000000..ca391cc8d8 --- /dev/null +++ b/libavcodec/vulkan/vc2_dwt_ver_legall.comp @@ -0,0 +1,78 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require + +layout(push_constant, scalar) uniform ComputeInfo { + int s; + int diff_offset; + int level; +}; + +int image_load(int coord_y) +{ + int coord_x = int(gl_GlobalInvocationID.x); + return imageLoad(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y)).x; +} + +void image_store(int coord_y, int value) +{ + int coord_x = int(gl_GlobalInvocationID.x); + imageStore(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y), ivec4(value)); +} + +void main() +{ + int coord_x = int(gl_GlobalInvocationID.x); + uint plane_idx = gl_GlobalInvocationID.z; + ivec2 work_area = imageSize(coef_buf[plane_idx]); + int dist = 1 << level; + if (coord_x >= work_area.x || (coord_x & (dist - 1)) != 0) + return; + + // Lifting stage 2 + for (int y = dist; y < work_area.y - 2 * dist; y += 2 * dist) { + int lhs = image_load(y - dist); + int rhs = image_load(y + dist); + int value = image_load(y); + value -= (lhs + rhs + 1) >> 1; + image_store(y, value); + } + int lhs = image_load(work_area.y - 2 * dist); + int value = image_load(work_area.y - dist); + value -= (2 * lhs + 1) >> 1; + image_store(work_area.y - dist, value); + + // Lifting stage 1 + lhs = image_load(dist); + value = image_load(0); + value += (2 * lhs + 2) >> 2; + image_store(0, value); + for (int y = 2 * dist; y <= work_area.y - 2 * dist; y += 2 * dist) { + int lhs = image_load(y + dist); + int rhs = image_load(y - dist); + int value = image_load(y); + value += (lhs + rhs + 2) >> 2; + image_store(y, value); + } +} diff --git a/libavcodec/vulkan/vc2_encode.comp b/libavcodec/vulkan/vc2_encode.comp new file mode 100644 index 0000000000..bd0bbbc1ca --- /dev/null +++ b/libavcodec/vulkan/vc2_encode.comp @@ -0,0 +1,173 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_shader_explicit_arithmetic_types : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_debug_printf : require + +#define MAX_DWT_LEVELS (5) + +layout(push_constant, scalar) uniform ComputeInfo { + u8buf bytestream; + ivec2 num_slices; + int wavelet_depth; + int size_scaler; + int prefix_bytes; +}; + +void put_vc2_ue_uint(inout PutBitContext pb, uint val) +{ + uint64_t pbits = 1; + int bits = 1; + + ++val; + + while ((val >> 8) != 0) + { + pbits |= uint64_t(interleaved_ue_golomb_tab[val & 0xff]) << bits; + val >>= 8; + bits += 16; + } + + pbits |= uint64_t(top_interleaved_ue_golomb_tab[val]) << bits; + bits += golomb_len_tab[val]; + put_bits63(pb, bits, pbits); +} + +void skip_put_bytes(inout PutBitContext pb, int n) +{ + int skip_dwords = n >> 2; + while (skip_dwords > 0) + { + u8vec4buf(pb.buf).v = u8vec4(0xFF); + pb.buf += 4; + skip_dwords--; + } + int skip_bits = (n & 3) << 3; + pb.bit_buf = (1 << skip_bits) - 1; + pb.bit_left = uint8_t(BUF_BITS - skip_bits); +} + +int quants[MAX_DWT_LEVELS][4]; + +int subband_coord(int index, int h, int lvl) +{ + int coord = index; + coord <<= 1; + coord |= h; + coord <<= (wavelet_depth-lvl-1); + return coord; +} + +void main() +{ + int slice_index = int(gl_GlobalInvocationID.x); + int max_index = num_slices.x * num_slices.y; + if (slice_index >= max_index) + return; + + /* Step 2. Quantize and encode */ + int pb_start = slice_args[slice_index].pb_start; + int workgroup_x = int(gl_WorkGroupSize.x); + for (int i = 0, index = workgroup_x - 1; i < gl_WorkGroupID.x; i++) { + pb_start += slice_args[index].pb_start + slice_args[index].bytes; + index += workgroup_x; + } + ivec2 slice_coord = ivec2(slice_index % num_slices.x, slice_index / num_slices.x); + int slice_bytes_max = slice_args[slice_index].bytes; + int quant_index = slice_args[slice_index].quant_idx; + + PutBitContext pb; + init_put_bits(pb, OFFBUF(u8buf, bytestream, pb_start), slice_bytes_max); + + for (int level = 0; level < wavelet_depth; level++) + for (int orientation = int(level > 0); orientation < 4; orientation++) + quants[level][orientation] = max(quant_index - lut_quant[level][orientation], 0); + + /* Write quant index for this slice */ + put_bits(pb, 8, quant_index); + + /* Luma + 2 Chroma planes */ + for (int p = 0; p < 3; p++) + { + int pad_s, pad_c; + int bytes_start = int32_t(put_bytes_count(pb)); + + /* Save current location and write a zero value */ + uint64_t write_ptr_start = pb.buf; + int bit_left_start = pb.bit_left; + put_bits(pb, 8, 0); + + ivec2 dwt_dim = imageSize(coef_buf[p]); + for (int level = 0; level < wavelet_depth; level++) + { + ivec2 band_size = dwt_dim >> (wavelet_depth - level); + for (int o = int(level > 0); o < 4; o++) + { + /* Encode subband */ + int left = band_size.x * (slice_coord.x) / num_slices.x; + int right = band_size.x * (slice_coord.x+1) / num_slices.x; + int top = band_size.y * (slice_coord.y) / num_slices.y; + int bottom = band_size.y * (slice_coord.y+1) / num_slices.y; + + const int q_idx = quants[level][o]; + const int qfactor = ff_dirac_qscale_tab[q_idx]; + + const int yh = o >> 1; + const int xh = o & 1; + + for (int y = top; y < bottom; y++) + { + for (int x = left; x < right; x++) + { + int sx = subband_coord(x, xh, level); + int sy = subband_coord(y, yh, level); + int coef = imageLoad(coef_buf[p], ivec2(sx, sy)).x; + uint c_abs = uint(abs(coef)); + c_abs = (c_abs << 2) / qfactor; + put_vc2_ue_uint(pb, c_abs); + if (c_abs != 0) + put_bits(pb, 1, int(coef < 0)); + } + } + } + } + flush_put_bits(pb); + int bytes_len = int32_t(put_bytes_count(pb)) - bytes_start - 1; + if (p == 2) + { + int len_diff = slice_bytes_max - int32_t(put_bytes_count(pb)); + pad_s = align((bytes_len + len_diff), size_scaler)/size_scaler; + pad_c = (pad_s*size_scaler) - bytes_len; + } + else + { + pad_s = align(bytes_len, size_scaler)/size_scaler; + pad_c = (pad_s*size_scaler) - bytes_len; + } + uint64_t start_ptr = write_ptr_start + ((BUF_BITS - bit_left_start) >> 3); + u8buf(start_ptr).v = uint8_t(pad_s); + /* vc2-reference uses that padding that decodes to '0' coeffs */ + skip_put_bytes(pb, pad_c); + } +} diff --git a/libavcodec/vulkan/vc2_slice_sizes.comp b/libavcodec/vulkan/vc2_slice_sizes.comp new file mode 100644 index 0000000000..61070c1dc2 --- /dev/null +++ b/libavcodec/vulkan/vc2_slice_sizes.comp @@ -0,0 +1,170 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_shader_explicit_arithmetic_types : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require + +#define DIRAC_MAX_QUANT_INDEX 116 +#define MAX_DWT_LEVELS 5 + +layout(push_constant, scalar) uniform ComputeInfo { + ivec2 num_slices; + int wavelet_depth; + int size_scaler; + int prefix_bytes; + int bits_ceil; + int bits_floor; +}; + +int count_vc2_ue_uint(uint val) +{ + return 2 * findMSB(val + 1) + 1; +} + +int cache[DIRAC_MAX_QUANT_INDEX]; +int quants[MAX_DWT_LEVELS][4]; +shared int slice_sizes[gl_WorkGroupSize.x]; + +int subband_coord(int index, int h, int lvl) +{ + int coord = index; + coord <<= 1; + coord |= h; + coord <<= (wavelet_depth-lvl-1); + return coord; +} + +int count_hq_slice(int quant_index) +{ + int bits = 0; + if (cache[quant_index] != 0) + return cache[quant_index]; + + bits += 8*prefix_bytes; + bits += 8; /* quant_idx */ + + for (int level = 0; level < wavelet_depth; level++) + for (int orientation = int(level > 0); orientation < 4; orientation++) + quants[level][orientation] = max(quant_index - lut_quant[level][orientation], 0); + + int slice_index = int(gl_GlobalInvocationID.x); + ivec2 slice_coord = ivec2(slice_index % num_slices.x, slice_index / num_slices.x); + for (int p = 0; p < 3; p++) + { + int bytes_start = bits >> 3; + bits += 8; + + ivec2 dwt_dim = imageSize(coef_buf[p]); + for (int level = 0; level < wavelet_depth; level++) + { + ivec2 band_dim = dwt_dim >> (wavelet_depth - level); + for (int o = int(level > 0); o < 4; o++) + { + const int left = band_dim.x * slice_coord.x / num_slices.x; + const int right = band_dim.x * (slice_coord.x+1) / num_slices.x; + const int top = band_dim.y * slice_coord.y / num_slices.y; + const int bottom = band_dim.y * (slice_coord.y+1) / num_slices.y; + + const int q_idx = quants[level][o]; + const int qfactor = ff_dirac_qscale_tab[q_idx]; + + const int yh = o >> 1; + const int xh = o & 1; + + for (int y = top; y < bottom; y++) + { + for (int x = left; x < right; x++) + { + int sx = subband_coord(x, xh, level); + int sy = subband_coord(y, yh, level); + int coef = imageLoad(coef_buf[p], ivec2(sx, sy)).x; + uint c_abs = uint(abs(coef)); + c_abs = (c_abs << 2) / qfactor; + bits += count_vc2_ue_uint(c_abs); + bits += int(c_abs > 0); + } + } + } + } + bits += align(bits, 8) - bits; + int bytes_len = (bits >> 3) - bytes_start - 1; + int pad_s = align(bytes_len, size_scaler) / size_scaler; + int pad_c = (pad_s * size_scaler) - bytes_len; + bits += pad_c * 8; + } + + cache[quant_index] = bits; + return bits; +} + +int ssize_round(int b) +{ + return align(b, size_scaler) + 4 + prefix_bytes; +} + +void main() +{ + int slice_index = int(gl_GlobalInvocationID.x); + int max_index = num_slices.x * num_slices.y; + if (slice_index >= max_index) + return; + + for (int i = 0; i < DIRAC_MAX_QUANT_INDEX; i++) + cache[i] = 0; + + const int q_ceil = DIRAC_MAX_QUANT_INDEX; + const int top = bits_ceil; + const int bottom = bits_floor; + int quant_buf[2] = int[2](-1, -1); + int quant = slice_args[slice_index].quant_idx; + int step = 1; + int bits_last = 0; + int bits = count_hq_slice(quant); + while ((bits > top) || (bits < bottom)) + { + const int signed_step = bits > top ? +step : -step; + quant = clamp(quant + signed_step, 0, q_ceil-1); + bits = count_hq_slice(quant); + if (quant_buf[1] == quant) + { + quant = max(quant_buf[0], quant); + bits = quant == quant_buf[0] ? bits_last : bits; + break; + } + step = clamp(step / 2, 1, (q_ceil - 1) / 2); + quant_buf[1] = quant_buf[0]; + quant_buf[0] = quant; + bits_last = bits; + } + int bytes = ssize_round(bits >> 3); + slice_args[slice_index].quant_idx = clamp(quant, 0, q_ceil-1); + slice_args[slice_index].bytes = bytes; + slice_sizes[gl_LocalInvocationIndex] = bytes; + barrier(); + + /* Prefix sum for all slices in current workgroup */ + int total_bytes = 0; + for (int i = 0; i < gl_LocalInvocationIndex; i++) + total_bytes += slice_sizes[i]; + slice_args[slice_index].pb_start = total_bytes; +} -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v5 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths. 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths IndecisiveTurtle @ 2025-05-24 11:09 ` Andreas Rheinhardt 2025-05-24 11:28 ` IndecisiveTurtle 2025-05-26 1:30 ` Michael Niedermayer 1 sibling, 1 reply; 7+ messages in thread From: Andreas Rheinhardt @ 2025-05-24 11:09 UTC (permalink / raw) To: ffmpeg-devel IndecisiveTurtle: > From: IndecisiveTurtle <geoster3d@gmail.com> > > Performance wise, encoding a 3440x1440 1-minute video is performed in about 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it takes about 1.3 minutes on my NVIDIA GTX 1650 > > Haar shader has a subgroup optimized variant that applies when configured wavelet depth allows it > --- Same benchmarks as v4. Did the switch to put_bits63() not cost performance? - Andreas _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v5 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths. 2025-05-24 11:09 ` Andreas Rheinhardt @ 2025-05-24 11:28 ` IndecisiveTurtle 0 siblings, 0 replies; 7+ messages in thread From: IndecisiveTurtle @ 2025-05-24 11:28 UTC (permalink / raw) To: FFmpeg development discussions and patches > Same benchmarks as v4. Did the switch to put_bits63() not cost performance? In my tests it did not. I tested with old uint32_t, then immediately afterwards with uint64_t and times were the same. Στις Σάβ 24 Μαΐ 2025 στις 2:09 μ.μ., ο/η Andreas Rheinhardt <andreas.rheinhardt@outlook.com> έγραψε: > > IndecisiveTurtle: > > From: IndecisiveTurtle <geoster3d@gmail.com> > > > > Performance wise, encoding a 3440x1440 1-minute video is performed in about 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it takes about 1.3 minutes on my NVIDIA GTX 1650 > > > > Haar shader has a subgroup optimized variant that applies when configured wavelet depth allows it > > --- > > Same benchmarks as v4. Did the switch to put_bits63() not cost performance? > > - Andreas > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [FFmpeg-devel] [PATCH v5 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths. 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths IndecisiveTurtle 2025-05-24 11:09 ` Andreas Rheinhardt @ 2025-05-26 1:30 ` Michael Niedermayer 1 sibling, 0 replies; 7+ messages in thread From: Michael Niedermayer @ 2025-05-26 1:30 UTC (permalink / raw) To: FFmpeg development discussions and patches [-- Attachment #1.1: Type: text/plain, Size: 2540 bytes --] On Fri, May 23, 2025 at 11:23:48PM +0300, IndecisiveTurtle wrote: > From: IndecisiveTurtle <geoster3d@gmail.com> > > Performance wise, encoding a 3440x1440 1-minute video is performed in about 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it takes about 1.3 minutes on my NVIDIA GTX 1650 > > Haar shader has a subgroup optimized variant that applies when configured wavelet depth allows it > --- > configure | 1 + > libavcodec/Makefile | 3 + > libavcodec/allcodecs.c | 1 + > libavcodec/vc2enc.c | 2 +- > libavcodec/vc2enc_vulkan.c | 777 +++++++++++++++++++ > libavcodec/vulkan/vc2_dwt_haar.comp | 82 ++ > libavcodec/vulkan/vc2_dwt_haar_subgroup.comp | 75 ++ > libavcodec/vulkan/vc2_dwt_hor_legall.comp | 82 ++ > libavcodec/vulkan/vc2_dwt_upload.comp | 96 +++ > libavcodec/vulkan/vc2_dwt_ver_legall.comp | 78 ++ > libavcodec/vulkan/vc2_encode.comp | 173 +++++ > libavcodec/vulkan/vc2_slice_sizes.comp | 170 ++++ > 12 files changed, 1539 insertions(+), 1 deletion(-) > create mode 100644 libavcodec/vc2enc_vulkan.c > create mode 100644 libavcodec/vulkan/vc2_dwt_haar.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_haar_subgroup.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_hor_legall.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_upload.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_ver_legall.comp > create mode 100644 libavcodec/vulkan/vc2_encode.comp > create mode 100644 libavcodec/vulkan/vc2_slice_sizes.comp changes fate results: --- ./tests/ref/vsynth/vsynth1-vc2-420p 2025-05-23 14:14:31.544448136 +0200 +++ tests/data/fate/vsynth1-vc2-420p 2025-05-26 00:35:09.596187444 +0200 @@ -1,4 +1,4 @@ -74df65b15463f098587d8c09d87286a1 *tests/data/fate/vsynth1-vc2-420p.mov -1155415 tests/data/fate/vsynth1-vc2-420p.mov +bea01eb2c6212e8828802fdb28dc7eaf *tests/data/fate/vsynth1-vc2-420p.mov +1045964 tests/data/fate/vsynth1-vc2-420p.mov 387696707c79cf1a6c9aeff4024226b9 *tests/data/fate/vsynth1-vc2-420p.out.rawvideo stddev: 0.00 PSNR:999.99 MAXDIFF: 0 bytes: 7603200/ 760320 Test vsynth1-vc2-420p failed. Look at tests/data/fate/vsynth1-vc2-420p.err for details. [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Everything should be made as simple as possible, but not simpler. -- Albert Einstein [-- Attachment #1.2: signature.asc --] [-- Type: application/pgp-signature, Size: 195 bytes --] [-- Attachment #2: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-05-26 1:30 UTC | newest] Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2025-05-23 20:23 [FFmpeg-devel] [PATCH v5 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders IndecisiveTurtle 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 2/4] libavcodec/vc2enc: Switch quant to int IndecisiveTurtle 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder IndecisiveTurtle 2025-05-23 20:23 ` [FFmpeg-devel] [PATCH v5 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths IndecisiveTurtle 2025-05-24 11:09 ` Andreas Rheinhardt 2025-05-24 11:28 ` IndecisiveTurtle 2025-05-26 1:30 ` Michael Niedermayer
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git