* [FFmpeg-devel] [PATCH v4 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders @ 2025-05-17 20:48 IndecisiveTurtle 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 2/4] libavcodec/vc2enc: Switch quant to int IndecisiveTurtle ` (3 more replies) 0 siblings, 4 replies; 10+ messages in thread From: IndecisiveTurtle @ 2025-05-17 20:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: IndecisiveTurtle From: IndecisiveTurtle <geoster3d@gmail.com> --- libavcodec/Makefile | 2 +- libavcodec/vc2enc.c | 679 ++----------------------------------- libavcodec/vc2enc_common.c | 571 +++++++++++++++++++++++++++++++ libavcodec/vc2enc_common.h | 178 ++++++++++ 4 files changed, 772 insertions(+), 658 deletions(-) create mode 100644 libavcodec/vc2enc_common.c create mode 100644 libavcodec/vc2enc_common.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 77734dff24..bdf0d6742e 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -771,7 +771,7 @@ OBJS-$(CONFIG_VC1_CUVID_DECODER) += cuviddec.o OBJS-$(CONFIG_VC1_MMAL_DECODER) += mmaldec.o OBJS-$(CONFIG_VC1_QSV_DECODER) += qsvdec.o OBJS-$(CONFIG_VC1_V4L2M2M_DECODER) += v4l2_m2m_dec.o -OBJS-$(CONFIG_VC2_ENCODER) += vc2enc.o vc2enc_dwt.o diractab.o +OBJS-$(CONFIG_VC2_ENCODER) += vc2enc.o vc2enc_dwt.o vc2enc_common.o diractab.o OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdaudio.o OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdvideo.o diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c index 99ca95c40a..939bafa195 100644 --- a/libavcodec/vc2enc.c +++ b/libavcodec/vc2enc.c @@ -30,505 +30,11 @@ #include "put_bits.h" #include "version.h" -#include "vc2enc_dwt.h" -#include "diractab.h" - -/* The limited size resolution of each slice forces us to do this */ -#define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes) +#include "vc2enc_common.h" /* Decides the cutoff point in # of slices to distribute the leftover bytes */ #define SLICE_REDIST_TOTAL 150 -typedef struct VC2BaseVideoFormat { - enum AVPixelFormat pix_fmt; - AVRational time_base; - int width, height; - uint8_t interlaced, level; - char name[13]; -} VC2BaseVideoFormat; - -static const VC2BaseVideoFormat base_video_fmts[] = { - { 0 }, /* Custom format, here just to make indexing equal to base_vf */ - { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 176, 120, 0, 1, "QSIF525" }, - { AV_PIX_FMT_YUV420P, { 2, 25 }, 176, 144, 0, 1, "QCIF" }, - { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 352, 240, 0, 1, "SIF525" }, - { AV_PIX_FMT_YUV420P, { 2, 25 }, 352, 288, 0, 1, "CIF" }, - { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 704, 480, 0, 1, "4SIF525" }, - { AV_PIX_FMT_YUV420P, { 2, 25 }, 704, 576, 0, 1, "4CIF" }, - - { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 480, 1, 2, "SD480I-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 25 }, 720, 576, 1, 2, "SD576I-50" }, - - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1280, 720, 0, 3, "HD720P-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1280, 720, 0, 3, "HD720P-50" }, - { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 1920, 1080, 1, 3, "HD1080I-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 25 }, 1920, 1080, 1, 3, "HD1080I-50" }, - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1920, 1080, 0, 3, "HD1080P-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1920, 1080, 0, 3, "HD1080P-50" }, - - { AV_PIX_FMT_YUV444P12, { 1, 24 }, 2048, 1080, 0, 4, "DC2K" }, - { AV_PIX_FMT_YUV444P12, { 1, 24 }, 4096, 2160, 0, 5, "DC4K" }, - - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 3840, 2160, 0, 6, "UHDTV 4K-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 3840, 2160, 0, 6, "UHDTV 4K-50" }, - - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 7680, 4320, 0, 7, "UHDTV 8K-60" }, - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 7680, 4320, 0, 7, "UHDTV 8K-50" }, - - { AV_PIX_FMT_YUV422P10, { 1001, 24000 }, 1920, 1080, 0, 3, "HD1080P-24" }, - { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 486, 1, 2, "SD Pro486" }, -}; -static const int base_video_fmts_len = FF_ARRAY_ELEMS(base_video_fmts); - -enum VC2_QM { - VC2_QM_DEF = 0, - VC2_QM_COL, - VC2_QM_FLAT, - - VC2_QM_NB -}; - -typedef struct SubBand { - dwtcoef *buf; - ptrdiff_t stride; - int width; - int height; -} SubBand; - -typedef struct Plane { - SubBand band[MAX_DWT_LEVELS][4]; - dwtcoef *coef_buf; - int width; - int height; - int dwt_width; - int dwt_height; - ptrdiff_t coef_stride; -} Plane; - -typedef struct SliceArgs { - const struct VC2EncContext *ctx; - union { - int cache[DIRAC_MAX_QUANT_INDEX]; - uint8_t *buf; - }; - int x; - int y; - int quant_idx; - int bits_ceil; - int bits_floor; - int bytes; -} SliceArgs; - -typedef struct TransformArgs { - const struct VC2EncContext *ctx; - Plane *plane; - const void *idata; - ptrdiff_t istride; - int field; - VC2TransformContext t; -} TransformArgs; - -typedef struct VC2EncContext { - AVClass *av_class; - PutBitContext pb; - Plane plane[3]; - AVCodecContext *avctx; - DiracVersionInfo ver; - - SliceArgs *slice_args; - TransformArgs transform_args[3]; - - /* For conversion from unsigned pixel values to signed */ - int diff_offset; - int bpp; - int bpp_idx; - - /* Picture number */ - uint32_t picture_number; - - /* Base video format */ - int base_vf; - int level; - int profile; - - /* Quantization matrix */ - uint8_t quant[MAX_DWT_LEVELS][4]; - int custom_quant_matrix; - - /* Division LUT */ - uint32_t qmagic_lut[116][2]; - - int num_x; /* #slices horizontally */ - int num_y; /* #slices vertically */ - int prefix_bytes; - int size_scaler; - int chroma_x_shift; - int chroma_y_shift; - - /* Rate control stuff */ - int frame_max_bytes; - int slice_max_bytes; - int slice_min_bytes; - int q_ceil; - int q_avg; - - /* Options */ - double tolerance; - int wavelet_idx; - int wavelet_depth; - int strict_compliance; - int slice_height; - int slice_width; - int interlaced; - enum VC2_QM quant_matrix; - - /* Parse code state */ - uint32_t next_parse_offset; - enum DiracParseCodes last_parse_code; -} VC2EncContext; - -/// x_k x_{k-1} ... x_0 -> 0 x_k 0 x_{k - 1} ... 0 x_0 -static uint16_t interleaved_ue_golomb_tab[256]; -/// 1 x_{k-1} ... x_0 -> 0 0 0 x_{k - 1} ... 0 x_0 -static uint16_t top_interleaved_ue_golomb_tab[256]; -/// 1 x_{k-1} ... x_0 -> 2 * k -static uint8_t golomb_len_tab[256]; - -static av_cold void vc2_init_static_data(void) -{ - interleaved_ue_golomb_tab[1] = 1; - for (unsigned i = 2; i < 256; ++i) { - golomb_len_tab[i] = golomb_len_tab[i >> 1] + 2; - interleaved_ue_golomb_tab[i] = (interleaved_ue_golomb_tab[i >> 1] << 2) | (i & 1); - top_interleaved_ue_golomb_tab[i] = interleaved_ue_golomb_tab[i] ^ (1 << golomb_len_tab[i]); - } -} - -static av_always_inline void put_vc2_ue_uint_inline(PutBitContext *pb, uint32_t val) -{ - uint64_t pbits = 1; - int bits = 1; - - ++val; - - while (val >> 8) { - pbits |= (uint64_t)interleaved_ue_golomb_tab[val & 0xff] << bits; - val >>= 8; - bits += 16; - } - pbits |= (uint64_t)top_interleaved_ue_golomb_tab[val] << bits; - bits += golomb_len_tab[val]; - - put_bits63(pb, bits, pbits); -} - -static av_noinline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val) -{ - put_vc2_ue_uint_inline(pb, val); -} - -static av_always_inline int count_vc2_ue_uint(uint32_t val) -{ - return 2 * av_log2(val + 1) + 1; -} - -/* VC-2 10.4 - parse_info() */ -static void encode_parse_info(VC2EncContext *s, enum DiracParseCodes pcode) -{ - uint32_t cur_pos, dist; - - align_put_bits(&s->pb); - - cur_pos = put_bytes_count(&s->pb, 0); - - /* Magic string */ - ff_put_string(&s->pb, "BBCD", 0); - - /* Parse code */ - put_bits(&s->pb, 8, pcode); - - /* Next parse offset */ - dist = cur_pos - s->next_parse_offset; - AV_WB32(s->pb.buf + s->next_parse_offset + 5, dist); - s->next_parse_offset = cur_pos; - put_bits32(&s->pb, pcode == DIRAC_PCODE_END_SEQ ? 13 : 0); - - /* Last parse offset */ - put_bits32(&s->pb, s->last_parse_code == DIRAC_PCODE_END_SEQ ? 13 : dist); - - s->last_parse_code = pcode; -} - -/* VC-2 11.1 - parse_parameters() - * The level dictates what the decoder should expect in terms of resolution - * and allows it to quickly reject whatever it can't support. Remember, - * this codec kinda targets cheapo FPGAs without much memory. Unfortunately - * it also limits us greatly in our choice of formats, hence the flag to disable - * strict_compliance */ -static void encode_parse_params(VC2EncContext *s) -{ - put_vc2_ue_uint(&s->pb, s->ver.major); /* VC-2 demands this to be 2 */ - put_vc2_ue_uint(&s->pb, s->ver.minor); /* ^^ and this to be 0 */ - put_vc2_ue_uint(&s->pb, s->profile); /* 3 to signal HQ profile */ - put_vc2_ue_uint(&s->pb, s->level); /* 3 - 1080/720, 6 - 4K */ -} - -/* VC-2 11.3 - frame_size() */ -static void encode_frame_size(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) { - AVCodecContext *avctx = s->avctx; - put_vc2_ue_uint(&s->pb, avctx->width); - put_vc2_ue_uint(&s->pb, avctx->height); - } -} - -/* VC-2 11.3.3 - color_diff_sampling_format() */ -static void encode_sample_fmt(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) { - int idx; - if (s->chroma_x_shift == 1 && s->chroma_y_shift == 0) - idx = 1; /* 422 */ - else if (s->chroma_x_shift == 1 && s->chroma_y_shift == 1) - idx = 2; /* 420 */ - else - idx = 0; /* 444 */ - put_vc2_ue_uint(&s->pb, idx); - } -} - -/* VC-2 11.3.4 - scan_format() */ -static void encode_scan_format(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) - put_vc2_ue_uint(&s->pb, s->interlaced); -} - -/* VC-2 11.3.5 - frame_rate() */ -static void encode_frame_rate(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) { - AVCodecContext *avctx = s->avctx; - put_vc2_ue_uint(&s->pb, 0); - put_vc2_ue_uint(&s->pb, avctx->time_base.den); - put_vc2_ue_uint(&s->pb, avctx->time_base.num); - } -} - -/* VC-2 11.3.6 - aspect_ratio() */ -static void encode_aspect_ratio(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) { - AVCodecContext *avctx = s->avctx; - put_vc2_ue_uint(&s->pb, 0); - put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.num); - put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.den); - } -} - -/* VC-2 11.3.7 - clean_area() */ -static void encode_clean_area(VC2EncContext *s) -{ - put_bits(&s->pb, 1, 0); -} - -/* VC-2 11.3.8 - signal_range() */ -static void encode_signal_range(VC2EncContext *s) -{ - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) - put_vc2_ue_uint(&s->pb, s->bpp_idx); -} - -/* VC-2 11.3.9 - color_spec() */ -static void encode_color_spec(VC2EncContext *s) -{ - AVCodecContext *avctx = s->avctx; - put_bits(&s->pb, 1, !s->strict_compliance); - if (!s->strict_compliance) { - int val; - put_vc2_ue_uint(&s->pb, 0); - - /* primaries */ - put_bits(&s->pb, 1, 1); - if (avctx->color_primaries == AVCOL_PRI_BT470BG) - val = 2; - else if (avctx->color_primaries == AVCOL_PRI_SMPTE170M) - val = 1; - else if (avctx->color_primaries == AVCOL_PRI_SMPTE240M) - val = 1; - else - val = 0; - put_vc2_ue_uint(&s->pb, val); - - /* color matrix */ - put_bits(&s->pb, 1, 1); - if (avctx->colorspace == AVCOL_SPC_RGB) - val = 3; - else if (avctx->colorspace == AVCOL_SPC_YCOCG) - val = 2; - else if (avctx->colorspace == AVCOL_SPC_BT470BG) - val = 1; - else - val = 0; - put_vc2_ue_uint(&s->pb, val); - - /* transfer function */ - put_bits(&s->pb, 1, 1); - if (avctx->color_trc == AVCOL_TRC_LINEAR) - val = 2; - else if (avctx->color_trc == AVCOL_TRC_BT1361_ECG) - val = 1; - else - val = 0; - put_vc2_ue_uint(&s->pb, val); - } -} - -/* VC-2 11.3 - source_parameters() */ -static void encode_source_params(VC2EncContext *s) -{ - encode_frame_size(s); - encode_sample_fmt(s); - encode_scan_format(s); - encode_frame_rate(s); - encode_aspect_ratio(s); - encode_clean_area(s); - encode_signal_range(s); - encode_color_spec(s); -} - -/* VC-2 11 - sequence_header() */ -static void encode_seq_header(VC2EncContext *s) -{ - align_put_bits(&s->pb); - encode_parse_params(s); - put_vc2_ue_uint(&s->pb, s->base_vf); - encode_source_params(s); - put_vc2_ue_uint(&s->pb, s->interlaced); /* Frames or fields coding */ -} - -/* VC-2 12.1 - picture_header() */ -static void encode_picture_header(VC2EncContext *s) -{ - align_put_bits(&s->pb); - put_bits32(&s->pb, s->picture_number++); -} - -/* VC-2 12.3.4.1 - slice_parameters() */ -static void encode_slice_params(VC2EncContext *s) -{ - put_vc2_ue_uint(&s->pb, s->num_x); - put_vc2_ue_uint(&s->pb, s->num_y); - put_vc2_ue_uint(&s->pb, s->prefix_bytes); - put_vc2_ue_uint(&s->pb, s->size_scaler); -} - -/* 1st idx = LL, second - vertical, third - horizontal, fourth - total */ -static const uint8_t vc2_qm_col_tab[][4] = { - {20, 9, 15, 4}, - { 0, 6, 6, 4}, - { 0, 3, 3, 5}, - { 0, 3, 5, 1}, - { 0, 11, 10, 11} -}; - -static const uint8_t vc2_qm_flat_tab[][4] = { - { 0, 0, 0, 0}, - { 0, 0, 0, 0}, - { 0, 0, 0, 0}, - { 0, 0, 0, 0}, - { 0, 0, 0, 0} -}; - -static void init_quant_matrix(VC2EncContext *s) -{ - int level, orientation; - - if (s->wavelet_depth <= 4 && s->quant_matrix == VC2_QM_DEF) { - s->custom_quant_matrix = 0; - for (level = 0; level < s->wavelet_depth; level++) { - s->quant[level][0] = ff_dirac_default_qmat[s->wavelet_idx][level][0]; - s->quant[level][1] = ff_dirac_default_qmat[s->wavelet_idx][level][1]; - s->quant[level][2] = ff_dirac_default_qmat[s->wavelet_idx][level][2]; - s->quant[level][3] = ff_dirac_default_qmat[s->wavelet_idx][level][3]; - } - return; - } - - s->custom_quant_matrix = 1; - - if (s->quant_matrix == VC2_QM_DEF) { - for (level = 0; level < s->wavelet_depth; level++) { - for (orientation = 0; orientation < 4; orientation++) { - if (level <= 3) - s->quant[level][orientation] = ff_dirac_default_qmat[s->wavelet_idx][level][orientation]; - else - s->quant[level][orientation] = vc2_qm_col_tab[level][orientation]; - } - } - } else if (s->quant_matrix == VC2_QM_COL) { - for (level = 0; level < s->wavelet_depth; level++) { - for (orientation = 0; orientation < 4; orientation++) { - s->quant[level][orientation] = vc2_qm_col_tab[level][orientation]; - } - } - } else { - for (level = 0; level < s->wavelet_depth; level++) { - for (orientation = 0; orientation < 4; orientation++) { - s->quant[level][orientation] = vc2_qm_flat_tab[level][orientation]; - } - } - } -} - -/* VC-2 12.3.4.2 - quant_matrix() */ -static void encode_quant_matrix(VC2EncContext *s) -{ - int level; - put_bits(&s->pb, 1, s->custom_quant_matrix); - if (s->custom_quant_matrix) { - put_vc2_ue_uint(&s->pb, s->quant[0][0]); - for (level = 0; level < s->wavelet_depth; level++) { - put_vc2_ue_uint(&s->pb, s->quant[level][1]); - put_vc2_ue_uint(&s->pb, s->quant[level][2]); - put_vc2_ue_uint(&s->pb, s->quant[level][3]); - } - } -} - -/* VC-2 12.3 - transform_parameters() */ -static void encode_transform_params(VC2EncContext *s) -{ - put_vc2_ue_uint(&s->pb, s->wavelet_idx); - put_vc2_ue_uint(&s->pb, s->wavelet_depth); - - encode_slice_params(s); - encode_quant_matrix(s); -} - -/* VC-2 12.2 - wavelet_transform() */ -static void encode_wavelet_transform(VC2EncContext *s) -{ - encode_transform_params(s); - align_put_bits(&s->pb); -} - -/* VC-2 12 - picture_parse() */ -static void encode_picture_start(VC2EncContext *s) -{ - align_put_bits(&s->pb); - encode_picture_header(s); - align_put_bits(&s->pb); - encode_wavelet_transform(s); -} - #define QUANT(c, mul, add, shift) (((mul) * (c) + (add)) >> (shift)) /* VC-2 13.5.5.2 - slice_band() */ @@ -558,6 +64,11 @@ static void encode_subband(const VC2EncContext *s, PutBitContext *pb, } } +static inline int count_vc2_ue_uint(uint32_t val) +{ + return 2 * av_log2(val + 1) + 1; +} + static int count_hq_slice(SliceArgs *slice, int quant_idx) { int x, y; @@ -657,7 +168,7 @@ static int calc_slice_sizes(VC2EncContext *s) SliceArgs *enc_args = s->slice_args; SliceArgs *top_loc[SLICE_REDIST_TOTAL] = {NULL}; - init_quant_matrix(s); + ff_vc2_init_quant_matrix(s, s->quant); for (slice_y = 0; slice_y < s->num_y; slice_y++) { for (slice_x = 0; slice_x < s->num_x; slice_x++) { @@ -782,7 +293,7 @@ static int encode_hq_slice(AVCodecContext *avctx, void *arg) } /* VC-2 13.5.1 - low_delay_transform_data() */ -static int encode_slices(VC2EncContext *s) +static void encode_slices(VC2EncContext *s) { uint8_t *buf; int slice_x, slice_y, skip = 0; @@ -803,8 +314,6 @@ static int encode_slices(VC2EncContext *s) sizeof(SliceArgs)); skip_put_bytes(&s->pb, skip); - - return 0; } /* @@ -902,7 +411,7 @@ static int dwt_plane(AVCodecContext *avctx, void *arg) } static int encode_frame(VC2EncContext *s, AVPacket *avpkt, const AVFrame *frame, - const char *aux_data, const int header_size, int field) + const int header_size, int field) { int i, ret; int64_t max_frame_bytes; @@ -929,25 +438,8 @@ static int encode_frame(VC2EncContext *s, AVPacket *avpkt, const AVFrame *frame, init_put_bits(&s->pb, avpkt->data, avpkt->size); } - /* Sequence header */ - encode_parse_info(s, DIRAC_PCODE_SEQ_HEADER); - encode_seq_header(s); - - /* Encoder version */ - if (aux_data) { - encode_parse_info(s, DIRAC_PCODE_AUX); - ff_put_string(&s->pb, aux_data, 1); - } - - /* Picture header */ - encode_parse_info(s, DIRAC_PCODE_PICTURE_HQ); - encode_picture_start(s); - - /* Encode slices */ - encode_slices(s); - - /* End sequence */ - encode_parse_info(s, DIRAC_PCODE_END_SEQ); + /* Encode frame */ + ff_vc2_encode_frame(s, encode_slices); return 0; } @@ -956,45 +448,20 @@ static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet) { int ret = 0; - int slice_ceil, sig_size = 256; VC2EncContext *s = avctx->priv_data; const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; - const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT; const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT); const int header_size = 100 + aux_data_size; - int64_t r_bitrate = avctx->bit_rate >> (s->interlaced); - - s->avctx = avctx; - s->size_scaler = 2; - s->prefix_bytes = 0; - s->last_parse_code = 0; - s->next_parse_offset = 0; - - /* Rate control */ - s->frame_max_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num, - s->avctx->time_base.den) >> 3) - header_size; - s->slice_max_bytes = slice_ceil = av_rescale(s->frame_max_bytes, 1, s->num_x*s->num_y); - - /* Find an appropriate size scaler */ - while (sig_size > 255) { - int r_size = SSIZE_ROUND(s->slice_max_bytes); - if (r_size > slice_ceil) { - s->slice_max_bytes -= r_size - slice_ceil; - r_size = SSIZE_ROUND(s->slice_max_bytes); - } - sig_size = r_size/s->size_scaler; /* Signalled slize size */ - s->size_scaler <<= 1; - } - s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f); - if (s->slice_min_bytes < 0 || s->slice_max_bytes > INT_MAX >> 3) - return AVERROR(EINVAL); + ret = ff_vc2_frame_init_properties(avctx, s); + if (ret) + return ret; - ret = encode_frame(s, avpkt, frame, aux_data, header_size, s->interlaced); + ret = encode_frame(s, avpkt, frame, header_size, s->interlaced); if (ret) return ret; if (s->interlaced) { - ret = encode_frame(s, avpkt, frame, aux_data, header_size, 2); + ret = encode_frame(s, avpkt, frame, header_size, 2); if (ret) return ret; } @@ -1026,83 +493,13 @@ static av_cold int vc2_encode_end(AVCodecContext *avctx) static av_cold int vc2_encode_init(AVCodecContext *avctx) { - static AVOnce init_static_once = AV_ONCE_INIT; Plane *p; SubBand *b; - int i, level, o, shift; + int ret, i, level, o, shift; const AVPixFmtDescriptor *pixdesc; int depth; VC2EncContext *s = avctx->priv_data; - s->picture_number = 0; - - /* Total allowed quantization range */ - s->q_ceil = DIRAC_MAX_QUANT_INDEX; - - s->ver.major = 2; - s->ver.minor = 0; - s->profile = 3; - s->level = 3; - - s->base_vf = -1; - s->strict_compliance = 1; - - s->q_avg = 0; - s->slice_max_bytes = 0; - s->slice_min_bytes = 0; - - /* Mark unknown as progressive */ - s->interlaced = !((avctx->field_order == AV_FIELD_UNKNOWN) || - (avctx->field_order == AV_FIELD_PROGRESSIVE)); - - for (i = 0; i < base_video_fmts_len; i++) { - const VC2BaseVideoFormat *fmt = &base_video_fmts[i]; - if (avctx->pix_fmt != fmt->pix_fmt) - continue; - if (avctx->time_base.num != fmt->time_base.num) - continue; - if (avctx->time_base.den != fmt->time_base.den) - continue; - if (avctx->width != fmt->width) - continue; - if (avctx->height != fmt->height) - continue; - if (s->interlaced != fmt->interlaced) - continue; - s->base_vf = i; - s->level = base_video_fmts[i].level; - break; - } - - if (s->interlaced) - av_log(avctx, AV_LOG_WARNING, "Interlacing enabled!\n"); - - if ((s->slice_width & (s->slice_width - 1)) || - (s->slice_height & (s->slice_height - 1))) { - av_log(avctx, AV_LOG_ERROR, "Slice size is not a power of two!\n"); - return AVERROR(EINVAL); - } - - if ((s->slice_width > avctx->width) || - (s->slice_height > avctx->height)) { - av_log(avctx, AV_LOG_ERROR, "Slice size is bigger than the image!\n"); - return AVERROR(EINVAL); - } - - if (s->base_vf <= 0) { - if (avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) { - s->strict_compliance = s->base_vf = 0; - av_log(avctx, AV_LOG_WARNING, "Format does not strictly comply with VC2 specs\n"); - } else { - av_log(avctx, AV_LOG_WARNING, "Given format does not strictly comply with " - "the specifications, decrease strictness to use it.\n"); - return AVERROR(EINVAL); - } - } else { - av_log(avctx, AV_LOG_INFO, "Selected base video format = %i (%s)\n", - s->base_vf, base_video_fmts[s->base_vf].name); - } - pixdesc = av_pix_fmt_desc_get(avctx->pix_fmt); /* Chroma subsampling */ s->chroma_x_shift = pixdesc->log2_chroma_w; @@ -1110,47 +507,21 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) /* Bit depth and color range index */ depth = pixdesc->comp[0].depth; - if (depth == 8 && avctx->color_range == AVCOL_RANGE_JPEG) { - s->bpp = 1; - s->bpp_idx = 1; - s->diff_offset = 128; - } else if (depth == 8 && (avctx->color_range == AVCOL_RANGE_MPEG || - avctx->color_range == AVCOL_RANGE_UNSPECIFIED)) { - s->bpp = 1; - s->bpp_idx = 2; - s->diff_offset = 128; - } else if (depth == 10) { - s->bpp = 2; - s->bpp_idx = 3; - s->diff_offset = 512; - } else { - s->bpp = 2; - s->bpp_idx = 4; - s->diff_offset = 2048; - } + + /* Context initialization */ + ret = ff_vc2_encode_init(avctx, depth); + if (ret < 0) + return ret; /* Planes initialization */ for (i = 0; i < 3; i++) { - int w, h; p = &s->plane[i]; - p->width = avctx->width >> (i ? s->chroma_x_shift : 0); - p->height = avctx->height >> (i ? s->chroma_y_shift : 0); - if (s->interlaced) - p->height >>= 1; - p->dwt_width = w = FFALIGN(p->width, (1 << s->wavelet_depth)); - p->dwt_height = h = FFALIGN(p->height, (1 << s->wavelet_depth)); - p->coef_stride = FFALIGN(p->dwt_width, 32); p->coef_buf = av_mallocz(p->coef_stride*p->dwt_height*sizeof(dwtcoef)); if (!p->coef_buf) return AVERROR(ENOMEM); for (level = s->wavelet_depth-1; level >= 0; level--) { - w = w >> 1; - h = h >> 1; for (o = 0; o < 4; o++) { b = &p->band[level][o]; - b->width = w; - b->height = h; - b->stride = p->coef_stride; shift = (o > 1)*b->height*b->stride + (o & 1)*b->width; b->buf = p->coef_buf + shift; } @@ -1164,10 +535,6 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) return AVERROR(ENOMEM); } - /* Slices */ - s->num_x = s->plane[0].dwt_width/s->slice_width; - s->num_y = s->plane[0].dwt_height/s->slice_height; - s->slice_args = av_calloc(s->num_x*s->num_y, sizeof(SliceArgs)); if (!s->slice_args) return AVERROR(ENOMEM); @@ -1189,8 +556,6 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) } } - ff_thread_once(&init_static_once, vc2_init_static_data); - return 0; } diff --git a/libavcodec/vc2enc_common.c b/libavcodec/vc2enc_common.c new file mode 100644 index 0000000000..bd27fd3c40 --- /dev/null +++ b/libavcodec/vc2enc_common.c @@ -0,0 +1,571 @@ +/* +* Copyright (C) 2016 Open Broadcast Systems Ltd. +* Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com> +* +* This file is part of FFmpeg. +* +* FFmpeg is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2.1 of the License, or (at your option) any later version. +* +* FFmpeg is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with FFmpeg; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "libavutil/pixdesc.h" +#include "libavutil/thread.h" +#include "vc2enc_common.h" +#include "version.h" + +typedef struct VC2BaseVideoFormat { + enum AVPixelFormat pix_fmt; + AVRational time_base; + int width, height; + uint8_t interlaced, level; + char name[13]; +} VC2BaseVideoFormat; + +static const VC2BaseVideoFormat base_video_fmts[] = { + { 0 }, /* Custom format, here just to make indexing equal to base_vf */ + { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 176, 120, 0, 1, "QSIF525" }, + { AV_PIX_FMT_YUV420P, { 2, 25 }, 176, 144, 0, 1, "QCIF" }, + { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 352, 240, 0, 1, "SIF525" }, + { AV_PIX_FMT_YUV420P, { 2, 25 }, 352, 288, 0, 1, "CIF" }, + { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 704, 480, 0, 1, "4SIF525" }, + { AV_PIX_FMT_YUV420P, { 2, 25 }, 704, 576, 0, 1, "4CIF" }, + + { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 480, 1, 2, "SD480I-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 25 }, 720, 576, 1, 2, "SD576I-50" }, + + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1280, 720, 0, 3, "HD720P-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1280, 720, 0, 3, "HD720P-50" }, + { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 1920, 1080, 1, 3, "HD1080I-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 25 }, 1920, 1080, 1, 3, "HD1080I-50" }, + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1920, 1080, 0, 3, "HD1080P-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1920, 1080, 0, 3, "HD1080P-50" }, + + { AV_PIX_FMT_YUV444P12, { 1, 24 }, 2048, 1080, 0, 4, "DC2K" }, + { AV_PIX_FMT_YUV444P12, { 1, 24 }, 4096, 2160, 0, 5, "DC4K" }, + + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 3840, 2160, 0, 6, "UHDTV 4K-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 3840, 2160, 0, 6, "UHDTV 4K-50" }, + + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 7680, 4320, 0, 7, "UHDTV 8K-60" }, + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 7680, 4320, 0, 7, "UHDTV 8K-50" }, + + { AV_PIX_FMT_YUV422P10, { 1001, 24000 }, 1920, 1080, 0, 3, "HD1080P-24" }, + { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 486, 1, 2, "SD Pro486" }, +}; +static const int base_video_fmts_len = FF_ARRAY_ELEMS(base_video_fmts); + +/// x_k x_{k-1} ... x_0 -> 0 x_k 0 x_{k - 1} ... 0 x_0 +uint16_t interleaved_ue_golomb_tab[256]; +/// 1 x_{k-1} ... x_0 -> 0 0 0 x_{k - 1} ... 0 x_0 +uint16_t top_interleaved_ue_golomb_tab[256]; +/// 1 x_{k-1} ... x_0 -> 2 * k +uint8_t golomb_len_tab[256]; + +static av_cold void vc2_init_static_data(void) +{ + interleaved_ue_golomb_tab[1] = 1; + for (unsigned i = 2; i < 256; ++i) { + golomb_len_tab[i] = golomb_len_tab[i >> 1] + 2; + interleaved_ue_golomb_tab[i] = (interleaved_ue_golomb_tab[i >> 1] << 2) | (i & 1); + top_interleaved_ue_golomb_tab[i] = interleaved_ue_golomb_tab[i] ^ (1 << golomb_len_tab[i]); + } +} + +static void put_vc2_ue_uint(PutBitContext *pb, uint32_t val) +{ + put_vc2_ue_uint_inline(pb, val); +} + +/* VC-2 10.4 - parse_info() */ +static void encode_parse_info(VC2EncContext *s, enum DiracParseCodes pcode) +{ + uint32_t cur_pos, dist; + + align_put_bits(&s->pb); + + cur_pos = put_bytes_count(&s->pb, 0); + + /* Magic string */ + ff_put_string(&s->pb, "BBCD", 0); + + /* Parse code */ + put_bits(&s->pb, 8, pcode); + + /* Next parse offset */ + dist = cur_pos - s->next_parse_offset; + AV_WB32(s->pb.buf + s->next_parse_offset + 5, dist); + s->next_parse_offset = cur_pos; + put_bits32(&s->pb, pcode == DIRAC_PCODE_END_SEQ ? 13 : 0); + + cur_pos = put_bytes_count(&s->pb, 0); + + /* Last parse offset */ + put_bits32(&s->pb, s->last_parse_code == DIRAC_PCODE_END_SEQ ? 13 : dist); + + s->last_parse_code = pcode; +} + +/* VC-2 11.1 - parse_parameters() +* The level dictates what the decoder should expect in terms of resolution +* and allows it to quickly reject whatever it can't support. Remember, +* this codec kinda targets cheapo FPGAs without much memory. Unfortunately +* it also limits us greatly in our choice of formats, hence the flag to disable +* strict_compliance */ +static void encode_parse_params(VC2EncContext *s) +{ + put_vc2_ue_uint(&s->pb, s->ver.major); /* VC-2 demands this to be 2 */ + put_vc2_ue_uint(&s->pb, s->ver.minor); /* ^^ and this to be 0 */ + put_vc2_ue_uint(&s->pb, s->profile); /* 3 to signal HQ profile */ + put_vc2_ue_uint(&s->pb, s->level); /* 3 - 1080/720, 6 - 4K */ +} + +/* VC-2 11.3 - frame_size() */ +static void encode_frame_size(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) { + AVCodecContext *avctx = s->avctx; + put_vc2_ue_uint(&s->pb, avctx->width); + put_vc2_ue_uint(&s->pb, avctx->height); + } +} + +/* VC-2 11.3.3 - color_diff_sampling_format() */ +static void encode_sample_fmt(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) { + int idx; + if (s->chroma_x_shift == 1 && s->chroma_y_shift == 0) + idx = 1; /* 422 */ + else if (s->chroma_x_shift == 1 && s->chroma_y_shift == 1) + idx = 2; /* 420 */ + else + idx = 0; /* 444 */ + put_vc2_ue_uint(&s->pb, idx); + } +} + +/* VC-2 11.3.4 - scan_format() */ +static void encode_scan_format(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) + put_vc2_ue_uint(&s->pb, s->interlaced); +} + +/* VC-2 11.3.5 - frame_rate() */ +static void encode_frame_rate(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) { + AVCodecContext *avctx = s->avctx; + put_vc2_ue_uint(&s->pb, 0); + put_vc2_ue_uint(&s->pb, avctx->time_base.den); + put_vc2_ue_uint(&s->pb, avctx->time_base.num); + } +} + +/* VC-2 11.3.6 - aspect_ratio() */ +static void encode_aspect_ratio(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) { + AVCodecContext *avctx = s->avctx; + put_vc2_ue_uint(&s->pb, 0); + put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.num); + put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.den); + } +} + +/* VC-2 11.3.7 - clean_area() */ +static void encode_clean_area(VC2EncContext *s) +{ + put_bits(&s->pb, 1, 0); +} + +/* VC-2 11.3.8 - signal_range() */ +static void encode_signal_range(VC2EncContext *s) +{ + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) + put_vc2_ue_uint(&s->pb, s->bpp_idx); +} + +/* VC-2 11.3.9 - color_spec() */ +static void encode_color_spec(VC2EncContext *s) +{ + AVCodecContext *avctx = s->avctx; + put_bits(&s->pb, 1, !s->strict_compliance); + if (!s->strict_compliance) { + int val; + put_vc2_ue_uint(&s->pb, 0); + + /* primaries */ + put_bits(&s->pb, 1, 1); + if (avctx->color_primaries == AVCOL_PRI_BT470BG) + val = 2; + else if (avctx->color_primaries == AVCOL_PRI_SMPTE170M) + val = 1; + else if (avctx->color_primaries == AVCOL_PRI_SMPTE240M) + val = 1; + else + val = 0; + put_vc2_ue_uint(&s->pb, val); + + /* color matrix */ + put_bits(&s->pb, 1, 1); + if (avctx->colorspace == AVCOL_SPC_RGB) + val = 3; + else if (avctx->colorspace == AVCOL_SPC_YCOCG) + val = 2; + else if (avctx->colorspace == AVCOL_SPC_BT470BG) + val = 1; + else + val = 0; + put_vc2_ue_uint(&s->pb, val); + + /* transfer function */ + put_bits(&s->pb, 1, 1); + if (avctx->color_trc == AVCOL_TRC_LINEAR) + val = 2; + else if (avctx->color_trc == AVCOL_TRC_BT1361_ECG) + val = 1; + else + val = 0; + put_vc2_ue_uint(&s->pb, val); + } +} + +/* VC-2 11.3 - source_parameters() */ +static void encode_source_params(VC2EncContext *s) +{ + encode_frame_size(s); + encode_sample_fmt(s); + encode_scan_format(s); + encode_frame_rate(s); + encode_aspect_ratio(s); + encode_clean_area(s); + encode_signal_range(s); + encode_color_spec(s); +} + +/* VC-2 11 - sequence_header() */ +static void encode_seq_header(VC2EncContext *s) +{ + align_put_bits(&s->pb); + encode_parse_params(s); + put_vc2_ue_uint(&s->pb, s->base_vf); + encode_source_params(s); + put_vc2_ue_uint(&s->pb, s->interlaced); /* Frames or fields coding */ +} + +/* VC-2 12.1 - picture_header() */ +static void encode_picture_header(VC2EncContext *s) +{ + align_put_bits(&s->pb); + put_bits32(&s->pb, s->picture_number++); +} + +/* VC-2 12.3.4.1 - slice_parameters() */ +static void encode_slice_params(VC2EncContext *s) +{ + put_vc2_ue_uint(&s->pb, s->num_x); + put_vc2_ue_uint(&s->pb, s->num_y); + put_vc2_ue_uint(&s->pb, s->prefix_bytes); + put_vc2_ue_uint(&s->pb, s->size_scaler); +} + +/* 1st idx = LL, second - vertical, third - horizontal, fourth - total */ +static const uint8_t vc2_qm_col_tab[][4] = { + {20, 9, 15, 4}, + { 0, 6, 6, 4}, + { 0, 3, 3, 5}, + { 0, 3, 5, 1}, + { 0, 11, 10, 11} +}; + +static const uint8_t vc2_qm_flat_tab[][4] = { + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0}, + { 0, 0, 0, 0} +}; + +void ff_vc2_init_quant_matrix(VC2EncContext *s, uint8_t quant[MAX_DWT_LEVELS][4]) +{ + int level, orientation; + + if (s->wavelet_depth <= 4 && s->quant_matrix == VC2_QM_DEF) { + s->custom_quant_matrix = 0; + for (level = 0; level < s->wavelet_depth; level++) { + quant[level][0] = ff_dirac_default_qmat[s->wavelet_idx][level][0]; + quant[level][1] = ff_dirac_default_qmat[s->wavelet_idx][level][1]; + quant[level][2] = ff_dirac_default_qmat[s->wavelet_idx][level][2]; + quant[level][3] = ff_dirac_default_qmat[s->wavelet_idx][level][3]; + } + return; + } + + s->custom_quant_matrix = 1; + + if (s->quant_matrix == VC2_QM_DEF) { + for (level = 0; level < s->wavelet_depth; level++) { + for (orientation = 0; orientation < 4; orientation++) { + if (level <= 3) + quant[level][orientation] = ff_dirac_default_qmat[s->wavelet_idx][level][orientation]; + else + quant[level][orientation] = vc2_qm_col_tab[level][orientation]; + } + } + } else if (s->quant_matrix == VC2_QM_COL) { + for (level = 0; level < s->wavelet_depth; level++) { + for (orientation = 0; orientation < 4; orientation++) { + quant[level][orientation] = vc2_qm_col_tab[level][orientation]; + } + } + } else { + for (level = 0; level < s->wavelet_depth; level++) { + for (orientation = 0; orientation < 4; orientation++) { + quant[level][orientation] = vc2_qm_flat_tab[level][orientation]; + } + } + } +} + +/* VC-2 12.3.4.2 - quant_matrix() */ +static void encode_quant_matrix(VC2EncContext *s) +{ + int level; + put_bits(&s->pb, 1, s->custom_quant_matrix); + if (s->custom_quant_matrix) { + put_vc2_ue_uint(&s->pb, s->quant[0][0]); + for (level = 0; level < s->wavelet_depth; level++) { + put_vc2_ue_uint(&s->pb, s->quant[level][1]); + put_vc2_ue_uint(&s->pb, s->quant[level][2]); + put_vc2_ue_uint(&s->pb, s->quant[level][3]); + } + } +} + +/* VC-2 12.3 - transform_parameters() */ +static void encode_transform_params(VC2EncContext *s) +{ + put_vc2_ue_uint(&s->pb, s->wavelet_idx); + put_vc2_ue_uint(&s->pb, s->wavelet_depth); + + encode_slice_params(s); + encode_quant_matrix(s); +} + +/* VC-2 12.2 - wavelet_transform() */ +static void encode_wavelet_transform(VC2EncContext *s) +{ + encode_transform_params(s); + align_put_bits(&s->pb); +} + +/* VC-2 12 - picture_parse() */ +static void encode_picture_start(VC2EncContext *s) +{ + align_put_bits(&s->pb); + encode_picture_header(s); + align_put_bits(&s->pb); + encode_wavelet_transform(s); +} + +int ff_vc2_encode_init(AVCodecContext *avctx, int depth) +{ + static AVOnce init_static_once = AV_ONCE_INIT; + int i, level, o; + SubBand *b; + Plane *p; + VC2EncContext *s = avctx->priv_data; + + s->picture_number = 0; + + /* Total allowed quantization range */ + s->q_ceil = DIRAC_MAX_QUANT_INDEX; + + s->ver.major = 2; + s->ver.minor = 0; + s->profile = 3; + s->level = 3; + + s->base_vf = -1; + s->strict_compliance = 1; + + s->q_avg = 0; + s->slice_max_bytes = 0; + s->slice_min_bytes = 0; + + /* Mark unknown as progressive */ + s->interlaced = !((avctx->field_order == AV_FIELD_UNKNOWN) || + (avctx->field_order == AV_FIELD_PROGRESSIVE)); + + for (i = 0; i < base_video_fmts_len; i++) { + const VC2BaseVideoFormat *fmt = &base_video_fmts[i]; + if (avctx->pix_fmt != fmt->pix_fmt || avctx->time_base.num != fmt->time_base.num || + avctx->time_base.den != fmt->time_base.den || avctx->width != fmt->width || + avctx->height != fmt->height || s->interlaced != fmt->interlaced) + continue; + s->base_vf = i; + s->level = base_video_fmts[i].level; + break; + } + + if (s->interlaced) + av_log(avctx, AV_LOG_WARNING, "Interlacing enabled!\n"); + + if ((s->slice_width & (s->slice_width - 1)) || + (s->slice_height & (s->slice_height - 1))) { + av_log(avctx, AV_LOG_ERROR, "Slice size is not a power of two!\n"); + return AVERROR(EINVAL); + } + + if ((s->slice_width > avctx->width) || + (s->slice_height > avctx->height)) { + av_log(avctx, AV_LOG_ERROR, "Slice size is bigger than the image!\n"); + return AVERROR(EINVAL); + } + + if (s->base_vf <= 0) { + if (avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) { + s->strict_compliance = s->base_vf = 0; + av_log(avctx, AV_LOG_WARNING, "Format does not strictly comply with VC2 specs\n"); + } else { + av_log(avctx, AV_LOG_WARNING, "Given format does not strictly comply with " + "the specifications, decrease strictness to use it.\n"); + return AVERROR(EINVAL); + } + } else { + av_log(avctx, AV_LOG_INFO, "Selected base video format = %i (%s)\n", + s->base_vf, base_video_fmts[s->base_vf].name); + } + + /* Bit depth and color range index */ + if (depth == 8 && avctx->color_range == AVCOL_RANGE_JPEG) { + s->bpp = 1; + s->bpp_idx = 1; + s->diff_offset = 128; + } else if (depth == 8 && (avctx->color_range == AVCOL_RANGE_MPEG || + avctx->color_range == AVCOL_RANGE_UNSPECIFIED)) { + s->bpp = 1; + s->bpp_idx = 2; + s->diff_offset = 128; + } else if (depth == 10) { + s->bpp = 2; + s->bpp_idx = 3; + s->diff_offset = 512; + } else { + s->bpp = 2; + s->bpp_idx = 4; + s->diff_offset = 2048; + } + + /* Planes initialization */ + for (i = 0; i < 3; i++) { + int w, h; + p = &s->plane[i]; + p->width = avctx->width >> (i ? s->chroma_x_shift : 0); + p->height = avctx->height >> (i ? s->chroma_y_shift : 0); + if (s->interlaced) + p->height >>= 1; + p->dwt_width = w = FFALIGN(p->width, (1 << s->wavelet_depth)); + p->dwt_height = h = FFALIGN(p->height, (1 << s->wavelet_depth)); + p->coef_stride = FFALIGN(p->dwt_width, 32); + for (level = s->wavelet_depth-1; level >= 0; level--) { + w = w >> 1; + h = h >> 1; + for (o = 0; o < 4; o++) { + b = &p->band[level][o]; + b->width = w; + b->height = h; + b->stride = p->coef_stride; + } + } + } + + /* Slices */ + s->num_x = s->plane[0].dwt_width/s->slice_width; + s->num_y = s->plane[0].dwt_height/s->slice_height; + + ff_thread_once(&init_static_once, vc2_init_static_data); + + return 0; +} + +int ff_vc2_frame_init_properties(AVCodecContext *avctx, VC2EncContext *s) +{ + int slice_ceil, sig_size = 256; + const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; + const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT); + const int header_size = 100 + aux_data_size; + int64_t r_bitrate = avctx->bit_rate >> (s->interlaced); + + s->avctx = avctx; + s->size_scaler = 2; + s->prefix_bytes = 0; + s->last_parse_code = 0; + s->next_parse_offset = 0; + + /* Rate control */ + s->frame_max_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num, + s->avctx->time_base.den) >> 3) - header_size; + s->slice_max_bytes = slice_ceil = av_rescale(s->frame_max_bytes, 1, s->num_x * s->num_y); + + /* Find an appropriate size scaler */ + while (sig_size > 255) { + int r_size = SSIZE_ROUND(s->slice_max_bytes); + if (r_size > slice_ceil) { + s->slice_max_bytes -= r_size - slice_ceil; + r_size = SSIZE_ROUND(s->slice_max_bytes); + } + sig_size = r_size/s->size_scaler; /* Signalled slize size */ + s->size_scaler <<= 1; + } + + s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f); + if (s->slice_min_bytes < 0 || s->slice_max_bytes > INT_MAX >> 3) + return AVERROR(EINVAL); + + return 0; +} + +void ff_vc2_encode_frame(VC2EncContext *s, void(*encode_slices)(VC2EncContext*)) +{ + const int bitexact = s->avctx->flags & AV_CODEC_FLAG_BITEXACT; + const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT; + + /* Sequence header */ + encode_parse_info(s, DIRAC_PCODE_SEQ_HEADER); + encode_seq_header(s); + + /* Encoder version */ + if (aux_data) { + encode_parse_info(s, DIRAC_PCODE_AUX); + ff_put_string(&s->pb, aux_data, 1); + } + + /* Picture header */ + encode_parse_info(s, DIRAC_PCODE_PICTURE_HQ); + encode_picture_start(s); + + /* Encode slices */ + encode_slices(s); + + /* End sequence */ + encode_parse_info(s, DIRAC_PCODE_END_SEQ); +} \ No newline at end of file diff --git a/libavcodec/vc2enc_common.h b/libavcodec/vc2enc_common.h new file mode 100644 index 0000000000..0466869943 --- /dev/null +++ b/libavcodec/vc2enc_common.h @@ -0,0 +1,178 @@ +/* +* Copyright (C) 2016 Open Broadcast Systems Ltd. +* Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com> +* +* This file is part of FFmpeg. +* +* FFmpeg is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2.1 of the License, or (at your option) any later version. +* +* FFmpeg is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with FFmpeg; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifndef AVCODEC_VC2ENC_COMMON_H +#define AVCODEC_VC2ENC_COMMON_H + +#include "avcodec.h" +#include "dirac.h" +#include "put_bits.h" +#include "libavutil/attributes_internal.h" + +#include "vc2enc_dwt.h" +#include "diractab.h" + +/* The limited size resolution of each slice forces us to do this */ +#define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes) + +FF_VISIBILITY_PUSH_HIDDEN + +enum VC2_QM { + VC2_QM_DEF = 0, + VC2_QM_COL, + VC2_QM_FLAT, + + VC2_QM_NB +}; + +typedef struct SubBand { + dwtcoef *buf; + ptrdiff_t stride; + int width; + int height; + int shift; +} SubBand; + +typedef struct Plane { + SubBand band[MAX_DWT_LEVELS][4]; + dwtcoef *coef_buf; + int width; + int height; + int dwt_width; + int dwt_height; + ptrdiff_t coef_stride; +} Plane; + +typedef struct SliceArgs { + const struct VC2EncContext *ctx; + union { + int cache[DIRAC_MAX_QUANT_INDEX]; + uint8_t *buf; + }; + int x; + int y; + int quant_idx; + int bits_ceil; + int bits_floor; + int bytes; +} SliceArgs; + +typedef struct TransformArgs { + const struct VC2EncContext *ctx; + Plane *plane; + const void *idata; + ptrdiff_t istride; + int field; + VC2TransformContext t; +} TransformArgs; + +typedef struct VC2EncContext { + AVClass *av_class; + PutBitContext pb; + Plane plane[3]; + AVCodecContext *avctx; + DiracVersionInfo ver; + + SliceArgs *slice_args; + TransformArgs transform_args[3]; + + /* For conversion from unsigned pixel values to signed */ + int diff_offset; + int bpp; + int bpp_idx; + + /* Picture number */ + uint32_t picture_number; + + /* Base video format */ + int base_vf; + int level; + int profile; + + /* Quantization matrix */ + uint8_t quant[MAX_DWT_LEVELS][4]; + int custom_quant_matrix; + + /* Division LUT */ + uint32_t qmagic_lut[116][2]; + + int num_x; /* #slices horizontally */ + int num_y; /* #slices vertically */ + int prefix_bytes; + int size_scaler; + int chroma_x_shift; + int chroma_y_shift; + + /* Rate control stuff */ + int frame_max_bytes; + int slice_max_bytes; + int slice_min_bytes; + int q_ceil; + int q_avg; + + /* Options */ + double tolerance; + int wavelet_idx; + int wavelet_depth; + int strict_compliance; + int slice_height; + int slice_width; + int interlaced; + enum VC2_QM quant_matrix; + + /* Parse code state */ + uint32_t next_parse_offset; + enum DiracParseCodes last_parse_code; +} VC2EncContext; + +extern uint16_t interleaved_ue_golomb_tab[256]; +extern uint16_t top_interleaved_ue_golomb_tab[256]; +extern uint8_t golomb_len_tab[256]; + +static inline void put_vc2_ue_uint_inline(PutBitContext *pb, uint32_t val) +{ + uint64_t pbits = 1; + int bits = 1; + + ++val; + + while (val >> 8) { + pbits |= (uint64_t)interleaved_ue_golomb_tab[val & 0xff] << bits; + val >>= 8; + bits += 16; + } + pbits |= (uint64_t)top_interleaved_ue_golomb_tab[val] << bits; + bits += golomb_len_tab[val]; + + put_bits63(pb, bits, pbits); +} + +int ff_vc2_encode_init(AVCodecContext *avctx, int depth); + +int ff_vc2_frame_init_properties(AVCodecContext *avctx, VC2EncContext *s); + +void ff_vc2_init_quant_matrix(VC2EncContext *s, uint8_t quant[MAX_DWT_LEVELS][4]); + +void ff_vc2_encode_frame(VC2EncContext *s, void(*encode_slices)(VC2EncContext*)); + +FF_VISIBILITY_POP_HIDDEN + +#endif -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 10+ messages in thread
* [FFmpeg-devel] [PATCH v4 2/4] libavcodec/vc2enc: Switch quant to int 2025-05-17 20:48 [FFmpeg-devel] [PATCH v4 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders IndecisiveTurtle @ 2025-05-17 20:48 ` IndecisiveTurtle 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder IndecisiveTurtle ` (2 subsequent siblings) 3 siblings, 0 replies; 10+ messages in thread From: IndecisiveTurtle @ 2025-05-17 20:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: IndecisiveTurtle From: IndecisiveTurtle <geoster3d@gmail.com> Prevents compiler from mistaking it as a string Also makes passing it to the GPU in a buffer easier --- libavcodec/vc2enc_common.c | 2 +- libavcodec/vc2enc_common.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libavcodec/vc2enc_common.c b/libavcodec/vc2enc_common.c index bd27fd3c40..d4415674e7 100644 --- a/libavcodec/vc2enc_common.c +++ b/libavcodec/vc2enc_common.c @@ -304,7 +304,7 @@ static const uint8_t vc2_qm_flat_tab[][4] = { { 0, 0, 0, 0} }; -void ff_vc2_init_quant_matrix(VC2EncContext *s, uint8_t quant[MAX_DWT_LEVELS][4]) +void ff_vc2_init_quant_matrix(VC2EncContext *s, int quant[MAX_DWT_LEVELS][4]) { int level, orientation; diff --git a/libavcodec/vc2enc_common.h b/libavcodec/vc2enc_common.h index 0466869943..159f72452e 100644 --- a/libavcodec/vc2enc_common.h +++ b/libavcodec/vc2enc_common.h @@ -108,7 +108,7 @@ typedef struct VC2EncContext { int profile; /* Quantization matrix */ - uint8_t quant[MAX_DWT_LEVELS][4]; + int quant[MAX_DWT_LEVELS][4]; int custom_quant_matrix; /* Division LUT */ @@ -169,7 +169,7 @@ int ff_vc2_encode_init(AVCodecContext *avctx, int depth); int ff_vc2_frame_init_properties(AVCodecContext *avctx, VC2EncContext *s); -void ff_vc2_init_quant_matrix(VC2EncContext *s, uint8_t quant[MAX_DWT_LEVELS][4]); +void ff_vc2_init_quant_matrix(VC2EncContext *s, int quant[MAX_DWT_LEVELS][4]); void ff_vc2_encode_frame(VC2EncContext *s, void(*encode_slices)(VC2EncContext*)); -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 10+ messages in thread
* [FFmpeg-devel] [PATCH v4 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder 2025-05-17 20:48 [FFmpeg-devel] [PATCH v4 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders IndecisiveTurtle 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 2/4] libavcodec/vc2enc: Switch quant to int IndecisiveTurtle @ 2025-05-17 20:48 ` IndecisiveTurtle 2025-05-19 16:46 ` Andreas Rheinhardt 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths IndecisiveTurtle 2025-05-19 15:56 ` [FFmpeg-devel] [PATCH v4 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders Andreas Rheinhardt 3 siblings, 1 reply; 10+ messages in thread From: IndecisiveTurtle @ 2025-05-17 20:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: IndecisiveTurtle From: IndecisiveTurtle <geoster3d@gmail.com> --- libavcodec/vulkan/common.comp | 54 ++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp index 10af9c0623..db216a2ac6 100644 --- a/libavcodec/vulkan/common.comp +++ b/libavcodec/vulkan/common.comp @@ -18,6 +18,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require + layout(buffer_reference, buffer_reference_align = 1) buffer u8buf { uint8_t v; }; @@ -61,22 +64,20 @@ layout(buffer_reference, buffer_reference_align = 8) buffer u64buf { #define mid_pred(a, b, c) \ max(min((a), (b)), min(max((a), (b)), (c))) -/* TODO: optimize */ + uint align(uint src, uint a) { - uint res = src % a; - if (res == 0) - return src; - return src + a - res; + return (src + a - 1) & ~(a - 1); +} + +int align(int src, int a) +{ + return (src + a - 1) & ~(a - 1); } -/* TODO: optimize */ uint64_t align64(uint64_t src, uint64_t a) { - uint64_t res = src % a; - if (res == 0) - return src; - return src + a - res; + return (src + a - 1) & ~(a - 1); } #define reverse4(src) \ @@ -167,6 +168,39 @@ uint32_t flush_put_bits(inout PutBitContext pb) return uint32_t(pb.buf - pb.buf_start); } +void skip_put_bytes(inout PutBitContext pb, int n) +{ + int bytes_left = pb.bit_left >> 3; + if (n < bytes_left) + { + int n_bits = n << 3; + int mask = (1 << n_bits) - 1; + pb.bit_buf <<= n_bits; + pb.bit_buf |= mask; + pb.bit_left -= uint8_t(n_bits); + return; + } + if (pb.bit_left < BUF_BITS) + { + int mask = (1 << pb.bit_left) - 1; + pb.bit_buf <<= pb.bit_left; + pb.bit_buf |= mask; + u32vec2buf(pb.buf).v = BUF_REVERSE(pb.bit_buf); + pb.buf += BUF_BYTES; + n -= pb.bit_left >> 3; + } + int skip_dwords = n >> 2; + while (skip_dwords > 0) + { + u8vec4buf(pb.buf).v = u8vec4(0xFF); + pb.buf += 4; + skip_dwords--; + } + int skip_bits = (n & 3) << 3; + pb.bit_buf = (1 << skip_bits) - 1; + pb.bit_left = uint8_t(BUF_BITS - skip_bits); +} + void init_put_bits(out PutBitContext pb, u8buf data, uint64_t len) { pb.buf_start = uint64_t(data); -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder IndecisiveTurtle @ 2025-05-19 16:46 ` Andreas Rheinhardt 2025-05-19 17:02 ` IndecisiveTurtle 0 siblings, 1 reply; 10+ messages in thread From: Andreas Rheinhardt @ 2025-05-19 16:46 UTC (permalink / raw) To: ffmpeg-devel IndecisiveTurtle: > From: IndecisiveTurtle <geoster3d@gmail.com> > > --- > libavcodec/vulkan/common.comp | 54 ++++++++++++++++++++++++++++------- > 1 file changed, 44 insertions(+), 10 deletions(-) > > diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp > index 10af9c0623..db216a2ac6 100644 > --- a/libavcodec/vulkan/common.comp > +++ b/libavcodec/vulkan/common.comp > @@ -18,6 +18,9 @@ > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > */ > > +#extension GL_EXT_buffer_reference : require > +#extension GL_EXT_buffer_reference2 : require > + > layout(buffer_reference, buffer_reference_align = 1) buffer u8buf { > uint8_t v; > }; > @@ -61,22 +64,20 @@ layout(buffer_reference, buffer_reference_align = 8) buffer u64buf { > #define mid_pred(a, b, c) \ > max(min((a), (b)), min(max((a), (b)), (c))) > > -/* TODO: optimize */ > + > uint align(uint src, uint a) > { > - uint res = src % a; > - if (res == 0) > - return src; > - return src + a - res; > + return (src + a - 1) & ~(a - 1); > +} > + > +int align(int src, int a) > +{ > + return (src + a - 1) & ~(a - 1); > } > > -/* TODO: optimize */ > uint64_t align64(uint64_t src, uint64_t a) > { > - uint64_t res = src % a; > - if (res == 0) > - return src; > - return src + a - res; > + return (src + a - 1) & ~(a - 1); > } > > #define reverse4(src) \ > @@ -167,6 +168,39 @@ uint32_t flush_put_bits(inout PutBitContext pb) > return uint32_t(pb.buf - pb.buf_start); > } > > +void skip_put_bytes(inout PutBitContext pb, int n) > +{ > + int bytes_left = pb.bit_left >> 3; > + if (n < bytes_left) > + { > + int n_bits = n << 3; > + int mask = (1 << n_bits) - 1; > + pb.bit_buf <<= n_bits; > + pb.bit_buf |= mask; > + pb.bit_left -= uint8_t(n_bits); > + return; > + } > + if (pb.bit_left < BUF_BITS) > + { > + int mask = (1 << pb.bit_left) - 1; > + pb.bit_buf <<= pb.bit_left; > + pb.bit_buf |= mask; > + u32vec2buf(pb.buf).v = BUF_REVERSE(pb.bit_buf); > + pb.buf += BUF_BYTES; > + n -= pb.bit_left >> 3; > + } > + int skip_dwords = n >> 2; > + while (skip_dwords > 0) > + { > + u8vec4buf(pb.buf).v = u8vec4(0xFF); > + pb.buf += 4; > + skip_dwords--; > + } > + int skip_bits = (n & 3) << 3; > + pb.bit_buf = (1 << skip_bits) - 1; > + pb.bit_left = uint8_t(BUF_BITS - skip_bits); > +} This differs quite a lot from the software implementation: It does not presume that the PutBitContext is flushed and instead of simply skipping over the buffer it actually fills the buffer with n 0xFF bytes, effectively adding the memset used in the VC2 slice writing code to skip_put_bytes(). But this file is (if I am not mistaken) supposed to be generic, not vc2 specific, so this feels very wrong. > + > void init_put_bits(out PutBitContext pb, u8buf data, uint64_t len) > { > pb.buf_start = uint64_t(data); _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder 2025-05-19 16:46 ` Andreas Rheinhardt @ 2025-05-19 17:02 ` IndecisiveTurtle 0 siblings, 0 replies; 10+ messages in thread From: IndecisiveTurtle @ 2025-05-19 17:02 UTC (permalink / raw) To: FFmpeg development discussions and patches > This differs quite a lot from the software implementation: It does not > presume that the PutBitContext is flushed and instead of simply skipping > over the buffer it actually fills the buffer with n 0xFF bytes, > effectively adding the memset used in the VC2 slice writing code to > skip_put_bytes(). But this file is (if I am not mistaken) supposed to be > generic, not vc2 specific, so this feels very wrong. Would it be enough to move it to vc2_encode.comp or should I also rename the function? Στις Δευ 19 Μαΐ 2025 στις 7:46 μ.μ., ο/η Andreas Rheinhardt <andreas.rheinhardt@outlook.com> έγραψε: > > IndecisiveTurtle: > > From: IndecisiveTurtle <geoster3d@gmail.com> > > > > --- > > libavcodec/vulkan/common.comp | 54 ++++++++++++++++++++++++++++------- > > 1 file changed, 44 insertions(+), 10 deletions(-) > > > > diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp > > index 10af9c0623..db216a2ac6 100644 > > --- a/libavcodec/vulkan/common.comp > > +++ b/libavcodec/vulkan/common.comp > > @@ -18,6 +18,9 @@ > > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > > */ > > > > +#extension GL_EXT_buffer_reference : require > > +#extension GL_EXT_buffer_reference2 : require > > + > > layout(buffer_reference, buffer_reference_align = 1) buffer u8buf { > > uint8_t v; > > }; > > @@ -61,22 +64,20 @@ layout(buffer_reference, buffer_reference_align = 8) buffer u64buf { > > #define mid_pred(a, b, c) \ > > max(min((a), (b)), min(max((a), (b)), (c))) > > > > -/* TODO: optimize */ > > + > > uint align(uint src, uint a) > > { > > - uint res = src % a; > > - if (res == 0) > > - return src; > > - return src + a - res; > > + return (src + a - 1) & ~(a - 1); > > +} > > + > > +int align(int src, int a) > > +{ > > + return (src + a - 1) & ~(a - 1); > > } > > > > -/* TODO: optimize */ > > uint64_t align64(uint64_t src, uint64_t a) > > { > > - uint64_t res = src % a; > > - if (res == 0) > > - return src; > > - return src + a - res; > > + return (src + a - 1) & ~(a - 1); > > } > > > > #define reverse4(src) \ > > @@ -167,6 +168,39 @@ uint32_t flush_put_bits(inout PutBitContext pb) > > return uint32_t(pb.buf - pb.buf_start); > > } > > > > +void skip_put_bytes(inout PutBitContext pb, int n) > > +{ > > + int bytes_left = pb.bit_left >> 3; > > + if (n < bytes_left) > > + { > > + int n_bits = n << 3; > > + int mask = (1 << n_bits) - 1; > > + pb.bit_buf <<= n_bits; > > + pb.bit_buf |= mask; > > + pb.bit_left -= uint8_t(n_bits); > > + return; > > + } > > + if (pb.bit_left < BUF_BITS) > > + { > > + int mask = (1 << pb.bit_left) - 1; > > + pb.bit_buf <<= pb.bit_left; > > + pb.bit_buf |= mask; > > + u32vec2buf(pb.buf).v = BUF_REVERSE(pb.bit_buf); > > + pb.buf += BUF_BYTES; > > + n -= pb.bit_left >> 3; > > + } > > + int skip_dwords = n >> 2; > > + while (skip_dwords > 0) > > + { > > + u8vec4buf(pb.buf).v = u8vec4(0xFF); > > + pb.buf += 4; > > + skip_dwords--; > > + } > > + int skip_bits = (n & 3) << 3; > > + pb.bit_buf = (1 << skip_bits) - 1; > > + pb.bit_left = uint8_t(BUF_BITS - skip_bits); > > +} > > This differs quite a lot from the software implementation: It does not > presume that the PutBitContext is flushed and instead of simply skipping > over the buffer it actually fills the buffer with n 0xFF bytes, > effectively adding the memset used in the VC2 slice writing code to > skip_put_bytes(). But this file is (if I am not mistaken) supposed to be > generic, not vc2 specific, so this feels very wrong. > > > + > > void init_put_bits(out PutBitContext pb, u8buf data, uint64_t len) > > { > > pb.buf_start = uint64_t(data); > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 10+ messages in thread
* [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths. 2025-05-17 20:48 [FFmpeg-devel] [PATCH v4 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders IndecisiveTurtle 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 2/4] libavcodec/vc2enc: Switch quant to int IndecisiveTurtle 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder IndecisiveTurtle @ 2025-05-17 20:48 ` IndecisiveTurtle 2025-05-17 20:50 ` IndecisiveTurtle 2025-05-19 17:09 ` Andreas Rheinhardt 2025-05-19 15:56 ` [FFmpeg-devel] [PATCH v4 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders Andreas Rheinhardt 3 siblings, 2 replies; 10+ messages in thread From: IndecisiveTurtle @ 2025-05-17 20:48 UTC (permalink / raw) To: ffmpeg-devel; +Cc: IndecisiveTurtle From: IndecisiveTurtle <geoster3d@gmail.com> Performance wise, encoding a 3440x1440 1-minute video is performed in about 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it takes about 1.3 minutes on my NVIDIA GTX 1650 Haar shader has a subgroup optimized variant that applies when configured wavelet depth allows it --- configure | 1 + libavcodec/Makefile | 3 + libavcodec/allcodecs.c | 1 + libavcodec/vc2enc_vulkan.c | 775 +++++++++++++++++++ libavcodec/vulkan/vc2_dwt_haar.comp | 82 ++ libavcodec/vulkan/vc2_dwt_haar_subgroup.comp | 75 ++ libavcodec/vulkan/vc2_dwt_hor_legall.comp | 82 ++ libavcodec/vulkan/vc2_dwt_upload.comp | 96 +++ libavcodec/vulkan/vc2_dwt_ver_legall.comp | 78 ++ libavcodec/vulkan/vc2_encode.comp | 159 ++++ libavcodec/vulkan/vc2_slice_sizes.comp | 170 ++++ 11 files changed, 1522 insertions(+) create mode 100644 libavcodec/vc2enc_vulkan.c create mode 100644 libavcodec/vulkan/vc2_dwt_haar.comp create mode 100644 libavcodec/vulkan/vc2_dwt_haar_subgroup.comp create mode 100644 libavcodec/vulkan/vc2_dwt_hor_legall.comp create mode 100644 libavcodec/vulkan/vc2_dwt_upload.comp create mode 100644 libavcodec/vulkan/vc2_dwt_ver_legall.comp create mode 100644 libavcodec/vulkan/vc2_encode.comp create mode 100644 libavcodec/vulkan/vc2_slice_sizes.comp diff --git a/configure b/configure index 2e69b3c56c..09f9dff258 100755 --- a/configure +++ b/configure @@ -3132,6 +3132,7 @@ utvideo_encoder_select="bswapdsp huffman llvidencdsp" vble_decoder_select="llviddsp" vbn_decoder_select="texturedsp" vbn_encoder_select="texturedspenc" +vc2_vulkan_encoder_select="vulkan spirv_compiler" vmix_decoder_select="idctdsp" vc1_decoder_select="blockdsp h264qpel intrax8 mpegvideodec qpeldsp vc1dsp" vc1image_decoder_select="vc1_decoder" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index bdf0d6742e..20968520d7 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -772,6 +772,9 @@ OBJS-$(CONFIG_VC1_MMAL_DECODER) += mmaldec.o OBJS-$(CONFIG_VC1_QSV_DECODER) += qsvdec.o OBJS-$(CONFIG_VC1_V4L2M2M_DECODER) += v4l2_m2m_dec.o OBJS-$(CONFIG_VC2_ENCODER) += vc2enc.o vc2enc_dwt.o vc2enc_common.o diractab.o +OBJS-$(CONFIG_VC2_VULKAN_ENCODER) += vc2enc_vulkan.o vulkan/vc2_encode.o vulkan/vc2_slice_sizes.o \ + vulkan/vc2_dwt_hor_legall.o vulkan/vc2_dwt_ver_legall.o \ + vulkan/vc2_dwt_upload.o vulkan/vc2_dwt_haar.o vulkan/vc2_dwt_haar_subgroup.o OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdaudio.o OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdvideo.o diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index cd4f6ecd59..cd23a9490c 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -367,6 +367,7 @@ extern const FFCodec ff_vc1_mmal_decoder; extern const FFCodec ff_vc1_qsv_decoder; extern const FFCodec ff_vc1_v4l2m2m_decoder; extern const FFCodec ff_vc2_encoder; +extern const FFCodec ff_vc2_vulkan_encoder; extern const FFCodec ff_vcr1_decoder; extern const FFCodec ff_vmdvideo_decoder; extern const FFCodec ff_vmix_decoder; diff --git a/libavcodec/vc2enc_vulkan.c b/libavcodec/vc2enc_vulkan.c new file mode 100644 index 0000000000..23b204cf92 --- /dev/null +++ b/libavcodec/vc2enc_vulkan.c @@ -0,0 +1,775 @@ +/* + * Copyright (C) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/mem.h" +#include "libavutil/pixdesc.h" +#include "libavutil/opt.h" +#include "libavutil/thread.h" +#include "libavutil/version.h" +#include "libavutil/vulkan_spirv.h" +#include "libavutil/hwcontext_vulkan.h" +#include "libavutil/vulkan_loader.h" +#include "libavutil/vulkan.h" +#include "codec_internal.h" +#include "internal.h" +#include "encode.h" +#include "version.h" +#include "vc2enc_common.h" +#include "hwconfig.h" + +#define LEGALL_TILE_DIM 16 +#define LEGALL_WORKGROUP_X 64 +#define SLICE_WORKGROUP_X 128 +#define MAX_NUM_PLANES 3 + +extern const char *ff_source_common_comp; +extern const char *ff_source_vc2_encode_comp; +extern const char *ff_source_vc2_dwt_hor_legall_comp; +extern const char *ff_source_vc2_dwt_ver_legall_comp; +extern const char *ff_source_vc2_slice_sizes_comp; +extern const char *ff_source_vc2_dwt_upload_comp; +extern const char *ff_source_vc2_dwt_haar_comp; +extern const char *ff_source_vc2_dwt_haar_subgroup_comp; + +typedef struct VC2DwtPushData { + int s; + union { + int diff_offset; + int plane_idx; + }; + int level; +} VC2DwtPushData; + +typedef struct VC2EncAuxData { + int quant[MAX_DWT_LEVELS][4]; + int ff_dirac_qscale_tab[116]; + uint16_t interleaved_ue_golomb_tab[256]; + uint16_t top_interleaved_ue_golomb_tab[256]; + uint8_t golomb_len_tab[256]; +} VC2EncAuxData; + +typedef struct VC2EncPushData { + VkDeviceAddress pb; + int num_x; + int num_y; + int wavelet_depth; + int size_scaler; + int prefix_bytes; +} VC2EncPushData; + +typedef struct VC2EncSliceArgs { + int quant_idx; + int bytes; + int pb_start; + int pad; +} VC2EncSliceArgs; + +typedef struct VC2EncSliceCalcPushData { + int num_x; + int num_y; + int wavelet_depth; + int size_scaler; + int prefix_bytes; + int bits_ceil; + int bits_floor; +} VC2EncSliceCalcPushData; + +typedef struct VC2EncVulkanContext { + VC2EncContext base; + FFVkBuffer lut_buf; + FFVkBuffer slice_buf; + VC2EncSliceArgs *slice_args; + + /* Vulkan state */ + FFVulkanContext vkctx; + AVVulkanDeviceQueueFamily *qf; + FFVkExecPool e; + FFVkExecContext *exec; + + FFVulkanShader dwt_haar_shd; + FFVulkanShader dwt_upload_shd; + FFVulkanShader dwt_hor_shd, dwt_ver_shd; + FFVulkanShader slice_shd; + FFVulkanShader enc_shd; + AVBufferPool* dwt_buf_pool; + int haar_subgroup; + + VkBuffer plane_buf; + VC2EncPushData enc_consts; + VC2DwtPushData dwt_consts; + VC2EncSliceCalcPushData calc_consts; + + /* Intermediate frame pool */ + AVBufferRef *intermediate_frames_ref[3]; + AVFrame *intermediate_frame[AV_NUM_DATA_POINTERS]; + VkImageView intermediate_views[AV_NUM_DATA_POINTERS]; +} VC2EncVulkanContext; + +static int init_vulkan_pipeline(VC2EncVulkanContext* s, FFVkSPIRVCompiler *spv, + FFVulkanShader* shd, int push_size, + int lg_x, int lg_y, int lg_z, + const char* pl_name, const char* pl_source, + int start_desc, int num_desc) +{ + int err = 0; + uint8_t *spv_data; + size_t spv_len; + void *spv_opaque = NULL; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanDescriptorSetBinding *desc; + + ff_vk_shader_init(vkctx, shd, pl_name, VK_SHADER_STAGE_COMPUTE_BIT, + NULL, 0, lg_x, lg_y, lg_z, 0); + + av_bprintf(&shd->src, "struct SliceArgs {int quant_idx;int bytes;int pb_start;int pad;};\n"); + + desc = (FFVulkanDescriptorSetBinding []) { + { + .name = "src_planes", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = ff_vk_shader_rep_fmt(vkctx->frames->sw_format, FF_VK_REP_UINT), + .dimensions = 2, + .elems = av_pix_fmt_count_planes(vkctx->frames->sw_format), + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "coef_buf", + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + .mem_layout = "r32i", + .dimensions = 2, + .elems = 3, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + }, + { + .name = "AuxData", + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "int lut_quant[5][4]; int ff_dirac_qscale_tab[116]; " + "uint16_t interleaved_ue_golomb_tab[256]; " + "uint16_t top_interleaved_ue_golomb_tab[256]; " + "uint8_t golomb_len_tab[256];", + }, + { + .name = "SliceBuffer", + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = "SliceArgs slice_args[];", + }, + }; + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc + start_desc, num_desc, 0, 0)); + + ff_vk_shader_add_push_const(shd, 0, push_size, VK_SHADER_STAGE_COMPUTE_BIT); + av_bprintf(&shd->src, "#define PB_UNALIGNED\n"); + av_bprintf(&shd->src, "#define PLANE_FMT %d\n", vkctx->frames->sw_format); + GLSLD(ff_source_common_comp); + GLSLD(pl_source); + + /* Compile Haar shader */ + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); + RET(ff_vk_shader_register_exec(vkctx, &s->e, shd)); + +fail: + return err; +} + +static int init_frame_pools(AVCodecContext *avctx) +{ + int i, err = 0; + VC2EncVulkanContext *sv = avctx->priv_data; + AVHWFramesContext *frames_ctx; + AVVulkanFramesContext *vk_frames; + enum AVPixelFormat sw_format = AV_PIX_FMT_GRAY32; + + for (i = 0; i < 3; i++) { + sv->intermediate_frames_ref[i] = av_hwframe_ctx_alloc(sv->vkctx.device_ref); + if (!sv->intermediate_frames_ref[i]) + return AVERROR(ENOMEM); + + frames_ctx = (AVHWFramesContext *)sv->intermediate_frames_ref[i]->data; + frames_ctx->format = AV_PIX_FMT_VULKAN; + frames_ctx->sw_format = sw_format; + frames_ctx->width = sv->base.plane[i].dwt_width; + frames_ctx->height = sv->base.plane[i].dwt_height; + + vk_frames = frames_ctx->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT; + vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + + err = av_hwframe_ctx_init(sv->intermediate_frames_ref[i]); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n", + av_get_pix_fmt_name(sw_format), av_err2str(err)); + av_buffer_unref(&sv->intermediate_frames_ref[i]); + return err; + } + } + + return err; +} + +static void vulkan_bind_img_planes(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanShader *shd, VkImageView *views, + int set, int binding) +{ + for (int i = 0; i < 3; i++) + ff_vk_shader_update_img(s, e, shd, set, binding, i, + views[i], VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); +} + +static void dwt_plane_haar(VC2EncVulkanContext *s, FFVkExecContext *exec, + VkImageMemoryBarrier2* img_bar, int nb_img_bar) +{ + int p, group_x, group_y; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + Plane* plane; + + s->dwt_consts.level = s->base.wavelet_depth; + vulkan_bind_img_planes(vkctx, exec, &s->dwt_haar_shd, s->intermediate_views, 0, 0); + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_haar_shd); + + /* Haar pass */ + for (p = 0; p < 3; p++) { + plane = &s->base.plane[p]; + s->dwt_consts.plane_idx = p; + if (s->haar_subgroup) { + group_x = FFALIGN(plane->dwt_width, 8) >> 3; + group_y = FFALIGN(plane->dwt_height, 8) >> 3; + } else { + group_x = FFALIGN(plane->dwt_width, 32) >> 5; + group_y = FFALIGN(plane->dwt_height, 32) >> 5; + } + + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_haar_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2DwtPushData), &s->dwt_consts); + vk->CmdDispatch(exec->buf, group_x, group_y, 1); + } + + /* Wait for haar dispatches to complete */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); +} + +static void dwt_plane_legall(VC2EncVulkanContext *s, FFVkExecContext *exec, + VkImageMemoryBarrier2* img_bar, int nb_img_bar) +{ + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + int legall_group_x = (s->base.plane[0].dwt_height + LEGALL_WORKGROUP_X - 1) >> 6; + int legall_group_y = (s->base.plane[0].dwt_width + LEGALL_WORKGROUP_X - 1) >> 6; + int i; + + /* Perform legall wavelet trasform */ + for (i = 0; i < s->base.wavelet_depth; i++) { + s->dwt_consts.level = i; + + /* Horizontal legall pass */ + vulkan_bind_img_planes(vkctx, exec, &s->dwt_hor_shd, s->intermediate_views, 0, 0); + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_hor_shd); + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_hor_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2DwtPushData), &s->dwt_consts); + vk->CmdDispatch(exec->buf, legall_group_x, 1, 3); + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Vertical legall pass */ + vulkan_bind_img_planes(vkctx, exec, &s->dwt_ver_shd, s->intermediate_views, 0, 0); + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_ver_shd); + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_ver_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2DwtPushData), &s->dwt_consts); + vk->CmdDispatch(exec->buf, legall_group_y, 1, 3); + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + } +} + +static int dwt_planes(VC2EncVulkanContext *s, AVFrame *frame) +{ + int i, err = 0, nb_img_bar = 0; + int wavelet_idx = s->base.wavelet_idx; + int group_x = s->base.plane[0].dwt_width >> 3; + int group_y = s->base.plane[0].dwt_height >> 3; + FFVulkanContext *vkctx = &s->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + FFVkExecContext *exec = s->exec; + VkImageView views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; + + /* Generate barriers and image views for frame images. */ + RET(ff_vk_exec_add_dep_frame(vkctx, exec, frame, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, views, frame, FF_VK_REP_UINT)); + ff_vk_frame_barrier(vkctx, exec, frame, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + /* Submit the image barriers. */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Create a temporaty frames */ + nb_img_bar = 0; + for (i = 0; i < 3; i++) { + s->intermediate_frame[i] = av_frame_alloc(); + if (!s->intermediate_frame[i]) + return AVERROR(ENOMEM); + + RET(av_hwframe_get_buffer(s->intermediate_frames_ref[i], + s->intermediate_frame[i], 0)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, s->intermediate_frame[i], + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, &s->intermediate_views[i], + s->intermediate_frame[i], FF_VK_REP_INT)); + ff_vk_frame_barrier(vkctx, exec, s->intermediate_frame[i], img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + } + + /* Submit the image barriers. */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Bind input images to the shader. */ + ff_vk_shader_update_img_array(vkctx, exec, &s->dwt_upload_shd, frame, views, 0, 0, + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + vulkan_bind_img_planes(vkctx, exec, &s->dwt_upload_shd, s->intermediate_views, 0, 1); + + /* Upload coefficients from planes to the buffer. */ + s->dwt_consts.diff_offset = s->base.diff_offset; + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_upload_shd); + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_upload_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2DwtPushData), &s->dwt_consts); + vk->CmdDispatch(exec->buf, group_x, group_y, 1); + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + /* Perform wavelet trasform. */ + if (wavelet_idx == VC2_TRANSFORM_HAAR || wavelet_idx == VC2_TRANSFORM_HAAR_S) + dwt_plane_haar(s, exec, img_bar, nb_img_bar); + else if (wavelet_idx == VC2_TRANSFORM_5_3) + dwt_plane_legall(s, exec, img_bar, nb_img_bar); + +fail: + return err; +} + +static void encode_slices(VC2EncContext *s) +{ + VC2EncVulkanContext *sv = (VC2EncVulkanContext*)s; + FFVkExecContext *exec = sv->exec; + int num_slices = s->num_x * s->num_y; + int num_slice_groups = (num_slices + SLICE_WORKGROUP_X - 1) >> 7; + int i, skip = 0; + FFVulkanContext *vkctx = &sv->vkctx; + FFVulkanFunctions *vk = &vkctx->vkfn; + + /* Calculate slice sizes. */ + vulkan_bind_img_planes(vkctx, exec, &sv->slice_shd, sv->intermediate_views, 0, 0); + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->slice_shd, + 0, 1, 0, &sv->lut_buf, 0, + sizeof(VC2EncAuxData), + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->slice_shd, + 0, 2, 0, &sv->slice_buf, 0, + sv->slice_buf.size, + VK_FORMAT_UNDEFINED); + ff_vk_exec_bind_shader(vkctx, exec, &sv->slice_shd); + ff_vk_shader_update_push_const(vkctx, exec, &sv->slice_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2EncSliceCalcPushData), &sv->calc_consts); + vk->CmdDispatch(exec->buf, num_slice_groups, 1, 1); + + flush_put_bits(&s->pb); + sv->enc_consts.pb += put_bytes_output(&s->pb); + + /* Wait for slice sizes to be written. */ + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = sv->slice_buf.buf, + .size = sizeof(VC2EncSliceArgs) * num_slices, + .offset = 0, + }, + .bufferMemoryBarrierCount = 1U, + }); + + /* Perform the encoding. */ + vulkan_bind_img_planes(vkctx, exec, &sv->enc_shd, sv->intermediate_views, 0, 0); + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->enc_shd, + 0, 1, 0, &sv->lut_buf, 0, + sizeof(VC2EncAuxData), + VK_FORMAT_UNDEFINED); + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->enc_shd, + 0, 2, 0, &sv->slice_buf, 0, + sv->slice_buf.size, + VK_FORMAT_UNDEFINED); + ff_vk_exec_bind_shader(vkctx, exec, &sv->enc_shd); + ff_vk_shader_update_push_const(vkctx, exec, &sv->enc_shd, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(VC2EncPushData), &sv->enc_consts); + + vk->CmdDispatch(exec->buf, num_slice_groups, 1, 1); + + ff_vk_exec_submit(vkctx, exec); + ff_vk_exec_wait(vkctx, exec); + + for (int slice_y = 0; slice_y < s->num_y; slice_y++) { + for (int slice_x = 0; slice_x < s->num_x; slice_x++) { + VC2EncSliceArgs *args = &sv->slice_args[s->num_x * slice_y + slice_x]; + skip += args->bytes; + } + } + + /* Skip forward to write end header */ + skip_put_bytes(&s->pb, skip); + + /* Free allocated intermediate frames */ + for (i = 0; i < 3; i++) + av_frame_free(&sv->intermediate_frame[i]); +} + +static int encode_frame(VC2EncVulkanContext *sv, AVPacket *avpkt, + const AVFrame *frame, const int header_size) +{ + int ret; + int64_t max_frame_bytes; + AVBufferRef *avpkt_buf = NULL; + FFVkBuffer* buf_vk = NULL; + VC2EncContext* s = &sv->base; + FFVulkanContext *vkctx = &sv->vkctx; + + /* Perform wavelet pass on the input data. */ + ret = dwt_planes(sv, (AVFrame*)frame); + if (ret) + return ret; + + /* Allocate a buffer that can fit at all all 3 planes of data */ + max_frame_bytes = header_size + MAX_NUM_PLANES * s->avctx->width + * s->avctx->height + * sizeof(dwtcoef); + + /* Get a pooled device local host visible buffer for writing output data */ + ret = ff_vk_get_pooled_buffer(vkctx, &sv->dwt_buf_pool, &avpkt_buf, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, NULL, + max_frame_bytes, + VK_MEMORY_PROPERTY_HOST_CACHED_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (ret < 0) + return ret; + + ff_vk_exec_add_dep_buf(vkctx, sv->exec, &avpkt_buf, 1, 1); + buf_vk = (FFVkBuffer *)avpkt_buf->data; + sv->enc_consts.pb = buf_vk->address; + + /* Initialize packet. */ + avpkt->buf = avpkt_buf; + avpkt->data = buf_vk->mapped_mem; + avpkt->size = max_frame_bytes; + init_put_bits(&s->pb, avpkt->data, avpkt->size); + + /* Encode frame */ + ff_vc2_encode_frame(s, encode_slices); + + return 0; +} + +static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, + const AVFrame *frame, int *got_packet) +{ + int ret = 0; + VC2EncVulkanContext *sv = avctx->priv_data; + VC2EncContext *s = &sv->base; + const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; + const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT); + const int header_size = 100 + aux_data_size; + + ret = ff_vc2_frame_init_properties(avctx, s); + if (ret) + return ret; + + sv->calc_consts.size_scaler = s->size_scaler; + sv->calc_consts.bits_ceil = s->slice_max_bytes << 3; + sv->calc_consts.bits_floor = s->slice_min_bytes << 3; + sv->enc_consts.prefix_bytes = 0; + sv->enc_consts.size_scaler = s->size_scaler; + + sv->exec = ff_vk_exec_get(&sv->vkctx, &sv->e); + ff_vk_exec_start(&sv->vkctx, sv->exec); + + ret = encode_frame(sv, avpkt, frame, header_size); + if (ret) + return ret; + + flush_put_bits(&s->pb); + av_shrink_packet(avpkt, put_bytes_output(&s->pb)); + avpkt->flags |= AV_PKT_FLAG_KEY; + *got_packet = 1; + + return 0; +} + +static av_cold int vc2_encode_end(AVCodecContext *avctx) +{ + VC2EncVulkanContext *sv = avctx->priv_data; + FFVulkanContext *vkctx = &sv->vkctx; + int i; + + ff_vk_exec_pool_free(vkctx, &sv->e); + + ff_vk_shader_free(vkctx, &sv->dwt_upload_shd); + ff_vk_shader_free(vkctx, &sv->dwt_haar_shd); + ff_vk_shader_free(vkctx, &sv->dwt_hor_shd); + ff_vk_shader_free(vkctx, &sv->dwt_ver_shd); + ff_vk_shader_free(vkctx, &sv->slice_shd); + ff_vk_shader_free(vkctx, &sv->enc_shd); + + ff_vk_free_buf(vkctx, &sv->slice_buf); + ff_vk_free_buf(vkctx, &sv->lut_buf); + + for (i = 0; i < 3; i++) { + ff_vc2enc_free_transforms(&sv->base.transform_args[i].t); + av_buffer_unref(&sv->intermediate_frames_ref[i]); + } + + av_buffer_pool_uninit(&sv->dwt_buf_pool); + ff_vk_uninit(vkctx); + + return 0; +} + +static av_cold int vc2_encode_init(AVCodecContext *avctx) +{ + int err = 0, depth; + const AVPixFmtDescriptor *fmt; + VC2EncVulkanContext *sv = avctx->priv_data; + VC2EncContext *s = &sv->base; + FFVulkanContext *vkctx = &sv->vkctx; + FFVkSPIRVCompiler *spv; + VC2EncAuxData *ad = NULL; + unsigned int subgroup_size = vkctx->subgroup_props.maxSubgroupSize; + + /* Init vulkan */ + err = ff_vk_init(&sv->vkctx, avctx, NULL, avctx->hw_frames_ctx); + if (err < 0) + return err; + + sv->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); + if (!sv->qf) { + av_log(avctx, AV_LOG_ERROR, "Device has no compute queues!\n"); + return AVERROR(ENOTSUP); + } + + spv = ff_vk_spirv_init(); + if (!spv) { + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); + return AVERROR_EXTERNAL; + } + + ff_vk_exec_pool_init(vkctx, sv->qf, &sv->e, 1, 0, 0, 0, NULL); + + /* Chroma subsampling */ + err = av_pix_fmt_get_chroma_sub_sample(vkctx->frames->sw_format, &s->chroma_x_shift, + &s->chroma_y_shift); + if (err < 0) + return err; + + /* Bit depth and color range index */ + fmt = av_pix_fmt_desc_get(vkctx->frames->sw_format); + depth = fmt->comp[0].depth; + + /* 16-bit depth is unsupported by this encoder */ + if (depth == 16) { + av_log(avctx, AV_LOG_ERROR, "16-bit pixel format depth is unsupported by this encoder\n"); + return AVERROR(ENOTSUP); + } + + /* Perform common initialization. */ + err = ff_vc2_encode_init(avctx, depth); + if (err < 0) + return err; + + /* Initialize Haar push data */ + sv->dwt_consts.diff_offset = s->diff_offset; + sv->dwt_consts.s = s->wavelet_idx == VC2_TRANSFORM_HAAR_S ? 1 : 0; + sv->dwt_consts.level = 0; + + /* Initializer slice calculation push data */ + sv->calc_consts.num_x = s->num_x; + sv->calc_consts.num_y = s->num_y; + sv->calc_consts.wavelet_depth = s->wavelet_depth; + sv->calc_consts.prefix_bytes = s->prefix_bytes; + + /* Initialize encoder push data */ + sv->enc_consts.wavelet_depth = s->wavelet_depth; + sv->enc_consts.num_x = s->num_x; + sv->enc_consts.num_y = s->num_y; + + /* Create buffer for encoder auxilary data. */ + RET(ff_vk_create_buf(vkctx, &sv->lut_buf, sizeof(VC2EncAuxData), NULL, NULL, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(vkctx, &sv->lut_buf, (void *)&ad, 0)); + ff_vc2_init_quant_matrix(s, ad->quant); + memcpy(ad->ff_dirac_qscale_tab, ff_dirac_qscale_tab, sizeof(ff_dirac_qscale_tab)); + memcpy(ad->interleaved_ue_golomb_tab, interleaved_ue_golomb_tab, sizeof(interleaved_ue_golomb_tab)); + memcpy(ad->top_interleaved_ue_golomb_tab, top_interleaved_ue_golomb_tab, sizeof(top_interleaved_ue_golomb_tab)); + memcpy(ad->golomb_len_tab, golomb_len_tab, sizeof(golomb_len_tab)); + RET(ff_vk_unmap_buffer(vkctx, &sv->lut_buf, 1)); + + /* Create buffer for encoder auxilary data. */ + RET(ff_vk_create_buf(vkctx, &sv->slice_buf, + sizeof(VC2EncSliceArgs) * s->num_x * s->num_y, + NULL, NULL, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + RET(ff_vk_map_buffer(vkctx, &sv->slice_buf, (void *)&sv->slice_args, 0)); + memset(sv->slice_args, 0, sv->slice_buf.size); + + /* Initialize intermediate frame pool. */ + RET(init_frame_pools(avctx)); + + /* Initialize encoding pipelines */ + init_vulkan_pipeline(sv, spv, &sv->dwt_upload_shd, sizeof(VC2DwtPushData), + 8, 8, 1, "dwt_upload_pl", ff_source_vc2_dwt_upload_comp, 0, 2); + init_vulkan_pipeline(sv, spv, &sv->slice_shd, sizeof(VC2EncPushData), + SLICE_WORKGROUP_X, 1, 1, "slice_pl", ff_source_vc2_slice_sizes_comp, 1, 3); + init_vulkan_pipeline(sv, spv, &sv->enc_shd, sizeof(VC2EncPushData), + SLICE_WORKGROUP_X, 1, 1, "enc_pl", ff_source_vc2_encode_comp, 1, 3); + sv->haar_subgroup = 0; + + if (s->wavelet_idx == VC2_TRANSFORM_HAAR || s->wavelet_idx == VC2_TRANSFORM_HAAR_S) { + if (subgroup_size == 32 && s->wavelet_depth < 3) { + init_vulkan_pipeline(sv, spv, &sv->dwt_haar_shd, sizeof(VC2DwtPushData), + 64, 1, 1, "dwt_haar_pl", ff_source_vc2_dwt_haar_subgroup_comp, 1, 1); + sv->haar_subgroup = 1; + } else if (subgroup_size == 64 && s->wavelet_depth < 4) { + init_vulkan_pipeline(sv, spv, &sv->dwt_haar_shd, sizeof(VC2DwtPushData), + 64, 1, 1, "dwt_haar_pl", ff_source_vc2_dwt_haar_subgroup_comp, 1, 1); + sv->haar_subgroup = 1; + } else { + init_vulkan_pipeline(sv, spv, &sv->dwt_haar_shd, sizeof(VC2DwtPushData), + 32, 32, 1, "dwt_haar_pl", ff_source_vc2_dwt_haar_comp, 1, 1); + } + } else if (s->wavelet_idx == VC2_TRANSFORM_5_3) { + init_vulkan_pipeline(sv, spv, &sv->dwt_hor_shd, sizeof(VC2DwtPushData), + LEGALL_WORKGROUP_X, 1, 1, "dwt_hor_pl", ff_source_vc2_dwt_hor_legall_comp, 1, 1); + init_vulkan_pipeline(sv, spv, &sv->dwt_ver_shd, sizeof(VC2DwtPushData), + LEGALL_WORKGROUP_X, 1, 1, "dwt_ver_pl", ff_source_vc2_dwt_ver_legall_comp, 1, 1); + } + +fail: + return err; +} + +#define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption vc2enc_options[] = { + {"tolerance", "Max undershoot in percent", offsetof(VC2EncContext, tolerance), AV_OPT_TYPE_DOUBLE, {.dbl = 5.0f}, 0.0f, 45.0f, VC2ENC_FLAGS, .unit = "tolerance"}, + {"slice_width", "Slice width", offsetof(VC2EncContext, slice_width), AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024, VC2ENC_FLAGS, .unit = "slice_width"}, + {"slice_height", "Slice height", offsetof(VC2EncContext, slice_height), AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024, VC2ENC_FLAGS, .unit = "slice_height"}, + {"wavelet_depth", "Transform depth", offsetof(VC2EncContext, wavelet_depth), AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5, VC2ENC_FLAGS, .unit = "wavelet_depth"}, + {"wavelet_type", "Transform type", offsetof(VC2EncContext, wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_5_3}, 0, VC2_TRANSFORMS_NB, VC2ENC_FLAGS, .unit = "wavelet_idx"}, + {"5_3", "LeGall (5,3)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_5_3}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, + {"haar", "Haar (with shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR_S}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, + {"haar_noshift", "Haar (without shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, + {"qm", "Custom quantization matrix", offsetof(VC2EncContext, quant_matrix), AV_OPT_TYPE_INT, {.i64 = VC2_QM_DEF}, 0, VC2_QM_NB, VC2ENC_FLAGS, .unit = "quant_matrix"}, + {"default", "Default from the specifications", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_DEF}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, + {"color", "Prevents low bitrate discoloration", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_COL}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, + {"flat", "Optimize for PSNR", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_FLAT}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, + {NULL} +}; + +static const AVClass vc2enc_class = { + .class_name = "vc2_vulkan_encoder", + .category = AV_CLASS_CATEGORY_ENCODER, + .option = vc2enc_options, + .item_name = av_default_item_name, + .version = LIBAVUTIL_VERSION_INT +}; + +static const FFCodecDefault vc2enc_defaults[] = { + { "b", "600000000" }, + { NULL }, +}; + +static const AVCodecHWConfigInternal *const ff_vc2_hw_configs[] = { + HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), + HW_CONFIG_ENCODER_DEVICE(NONE, VULKAN), + NULL, +}; + +const FFCodec ff_vc2_vulkan_encoder = { + .p.name = "vc2_vulkan", + CODEC_LONG_NAME("SMPTE VC-2"), + .p.type = AVMEDIA_TYPE_VIDEO, + .p.id = AV_CODEC_ID_DIRAC, + .p.capabilities = AV_CODEC_CAP_HARDWARE, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .priv_data_size = sizeof(VC2EncVulkanContext), + .init = vc2_encode_init, + .close = vc2_encode_end, + FF_CODEC_ENCODE_CB(vc2_encode_frame), + .p.priv_class = &vc2enc_class, + .defaults = vc2enc_defaults, + CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), + .hw_configs = ff_vc2_hw_configs, +}; diff --git a/libavcodec/vulkan/vc2_dwt_haar.comp b/libavcodec/vulkan/vc2_dwt_haar.comp new file mode 100644 index 0000000000..4806cca729 --- /dev/null +++ b/libavcodec/vulkan/vc2_dwt_haar.comp @@ -0,0 +1,82 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require + +#define LOCAL_X 1024 + +layout(push_constant, scalar) uniform ComputeInfo { + int s; + int plane_idx; + int wavelet_depth; +}; + +shared int local_coef[LOCAL_X]; + +void main() +{ + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); + ivec2 dwt_dim = imageSize(coef_buf[plane_idx]); + int value = imageLoad(coef_buf[plane_idx], coord).x; + + /* Perform Haar wavelet on the 32x32 local workgroup with shared memory */ + for (int i = 0; i < wavelet_depth; i++) + { + ivec2 mask = ivec2((1 << i) - 1); + if (any(notEqual(coord & mask, ivec2(0)))) + break; + + /* Offset between valid hor pixels for each level, +1, +2, +4 etc */ + int dist = (1 << i); + + local_coef[gl_LocalInvocationIndex] = value; + barrier(); + + /* Horizontal haar wavelet */ + uint other_id = gl_LocalInvocationIndex ^ dist; + int other = local_coef[other_id]; + int a = gl_LocalInvocationIndex < other_id ? value : other; + int b = gl_LocalInvocationIndex < other_id ? other : value; + int dst_b = (b - a) * (1 << s); + int dst_a = a * (1 << s) + ((dst_b + 1) >> 1); + value = gl_LocalInvocationIndex < other_id ? dst_a : dst_b; + + /* Offset between valid ver pixels for each level, +1, +2, +4 etc */ + dist <<= 5; + + local_coef[gl_LocalInvocationIndex] = value; + barrier(); + + /* Vertical haar wavelet */ + other_id = gl_LocalInvocationIndex ^ dist; + other = local_coef[other_id]; + a = gl_LocalInvocationIndex < other_id ? value : other; + b = gl_LocalInvocationIndex < other_id ? other : value; + dst_b = b - a; + dst_a = a + ((dst_b + 1) >> 1); + value = gl_LocalInvocationIndex < other_id ? dst_a : dst_b; + } + + /* Store value */ + imageStore(coef_buf[plane_idx], coord, ivec4(value)); +} diff --git a/libavcodec/vulkan/vc2_dwt_haar_subgroup.comp b/libavcodec/vulkan/vc2_dwt_haar_subgroup.comp new file mode 100644 index 0000000000..81b0964271 --- /dev/null +++ b/libavcodec/vulkan/vc2_dwt_haar_subgroup.comp @@ -0,0 +1,75 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_scalar_block_layout : require +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_shuffle : require + +#define TILE_DIM 8 + +layout(push_constant, scalar) uniform ComputeInfo { + int s; + int plane_idx; + int wavelet_depth; +}; + +void main() +{ + ivec2 tile_coord = ivec2(gl_WorkGroupID.xy); + ivec2 local_coord = ivec2(gl_LocalInvocationIndex & 7, gl_LocalInvocationIndex >> 3); + ivec2 coord = tile_coord * ivec2(TILE_DIM) + local_coord; + + int value = imageLoad(coef_buf[plane_idx], coord).x; + for (int i = 0; i < wavelet_depth; i++) + { + ivec2 mask = ivec2((1 << i) - 1); + if (any(notEqual(local_coord & mask, ivec2(0)))) + break; + + /* Offset between valid hor pixels for each level, +1, +2, +4 etc */ + int dist = (1 << i); + + /* Horizontal haar wavelet */ + uint other_sub_id = gl_SubgroupInvocationID ^ dist; + int other = subgroupShuffle(value, other_sub_id); + int a = gl_SubgroupInvocationID < other_sub_id ? value : other; + int b = gl_SubgroupInvocationID < other_sub_id ? other : value; + int dst_b = (b - a) * (1 << s); + int dst_a = a * (1 << s) + ((dst_b + 1) >> 1); + value = gl_SubgroupInvocationID < other_sub_id ? dst_a : dst_b; + + /* Offset between valid ver pixels for each level, +1, +2, +4 etc */ + dist <<= 3; + + /* Vertical haar wavelet */ + other_sub_id = gl_SubgroupInvocationID ^ dist; + other = subgroupShuffle(value, other_sub_id); + a = gl_SubgroupInvocationID < other_sub_id ? value : other; + b = gl_SubgroupInvocationID < other_sub_id ? other : value; + dst_b = b - a; + dst_a = a + ((dst_b + 1) >> 1); + value = gl_SubgroupInvocationID < other_sub_id ? dst_a : dst_b; + } + + /* Store value */ + imageStore(coef_buf[plane_idx], coord, ivec4(value)); +} diff --git a/libavcodec/vulkan/vc2_dwt_hor_legall.comp b/libavcodec/vulkan/vc2_dwt_hor_legall.comp new file mode 100644 index 0000000000..bada2ee1fd --- /dev/null +++ b/libavcodec/vulkan/vc2_dwt_hor_legall.comp @@ -0,0 +1,82 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require + +layout(push_constant, scalar) uniform ComputeInfo { + int s; + int diff_offset; + int level; +}; + +int image_load(int coord_x) +{ + int coord_y = int(gl_GlobalInvocationID.x); + return imageLoad(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y)).x; +} + +void image_store(int coord_x, int value) +{ + int coord_y = int(gl_GlobalInvocationID.x); + imageStore(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y), ivec4(value)); +} + +void main() +{ + int coord_y = int(gl_GlobalInvocationID.x); + uint plane_idx = gl_GlobalInvocationID.z; + ivec2 work_area = imageSize(coef_buf[plane_idx]); + int dist = 1 << level; + if (coord_y >= work_area.y || (coord_y & (dist - 1)) != 0) + return; + + // Shift in one bit that is used for additional precision + for (int x = 0; x < work_area.x; x += dist) + image_store(x, image_load(x) << 1); + + // Lifting stage 2 + for (int x = 0; x < work_area.x - 2 * dist; x += 2 * dist) { + int lhs = image_load(x); + int rhs = image_load(x + 2 * dist); + int value = image_load(x + dist); + value -= (lhs + rhs + 1) >> 1; + image_store(x + dist, value); + } + int lhs = image_load(work_area.x - 2 * dist); + int value = image_load(work_area.x - dist); + value -= (2 * lhs + 1) >> 1; + image_store(work_area.x - dist, value); + + // Lifting stage 1 + lhs = image_load(dist); + value = image_load(0); + value += (2 * lhs + 2) >> 2; + image_store(0, value); + for (int x = 2 * dist; x <= work_area.x - 2 * dist; x += 2 * dist) { + int lhs = image_load(x - dist); + int rhs = image_load(x + dist); + int value = image_load(x); + value += (lhs + rhs + 2) >> 2; + image_store(x, value); + } +} diff --git a/libavcodec/vulkan/vc2_dwt_upload.comp b/libavcodec/vulkan/vc2_dwt_upload.comp new file mode 100644 index 0000000000..c758fd867f --- /dev/null +++ b/libavcodec/vulkan/vc2_dwt_upload.comp @@ -0,0 +1,96 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_shader_explicit_arithmetic_types : require + +#define AV_PIX_FMT_XV30 214 +#define AV_PIX_FMT_XV36 216 +#define AV_PIX_FMT_XV48 242 +#define AV_PIX_FMT_P212 222 +#define AV_PIX_FMT_P012 209 +#define AV_PIX_FMT_P210 198 +#define AV_PIX_FMT_P016 169 +#define AV_PIX_FMT_P010 158 +#define AV_PIX_FMT_NV16 101 +#define AV_PIX_FMT_NV12 23 + +#define Y 0 +#define U 1 +#define V 2 + +layout(push_constant, scalar) uniform ComputeInfo { + int s; + int diff_offset; + int level; +}; + +uvec4 load_plane(uint plane_idx) +{ + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); + return imageLoad(src_planes[plane_idx], coord); +} + +void store_plane(uint plane_idx, uint value) +{ + int result = int(value - diff_offset); + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); + imageStore(coef_buf[plane_idx], coord, ivec4(result)); +} + +void main() +{ + uvec4 p0 = load_plane(0); +#if PLANE_FMT == AV_PIX_FMT_XV30 + store_plane(Y, (p0.x >> 10) & 0x3FF); + store_plane(U, p0.x & 0x3FF); + store_plane(V, (p0.x >> 20) & 0x3FF); +#elif PLANE_FMT == AV_PIX_FMT_XV36 + store_plane(Y, p0.y >> 4); + store_plane(U, p0.x >> 4); + store_plane(V, p0.z >> 4); +#elif PLANE_FMT == AV_PIX_FMT_NV12 + uvec4 p1 = load_plane(1); + store_plane(Y, p0.x | p0.y << 8); + store_plane(U, p1.x); + store_plane(V, p1.y); +#elif PLANE_FMT == AV_PIX_FMT_NV16 + uvec4 p1 = load_plane(1); + store_plane(Y, p0.x); + store_plane(U, p1.x); + store_plane(V, p1.y); +#elif PLANE_FMT == AV_PIX_FMT_P010 || PLANE_FMT == AV_PIX_FMT_P210 + uvec4 p1 = load_plane(1); + store_plane(Y, p0.x >> 6); + store_plane(U, p1.x >> 6); + store_plane(V, p1.y >> 6); +#elif PLANE_FMT == AV_PIX_FMT_P012 || PLANE_FMT == AV_PIX_FMT_P212 + uvec4 p1 = load_plane(1); + store_plane(Y, p0.x >> 4); + store_plane(U, p1.x >> 4); + store_plane(V, p1.y >> 4); +#else + store_plane(Y, p0.x); + store_plane(U, load_plane(1).x); + store_plane(V, load_plane(2).x); +#endif +} diff --git a/libavcodec/vulkan/vc2_dwt_ver_legall.comp b/libavcodec/vulkan/vc2_dwt_ver_legall.comp new file mode 100644 index 0000000000..ca391cc8d8 --- /dev/null +++ b/libavcodec/vulkan/vc2_dwt_ver_legall.comp @@ -0,0 +1,78 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require + +layout(push_constant, scalar) uniform ComputeInfo { + int s; + int diff_offset; + int level; +}; + +int image_load(int coord_y) +{ + int coord_x = int(gl_GlobalInvocationID.x); + return imageLoad(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y)).x; +} + +void image_store(int coord_y, int value) +{ + int coord_x = int(gl_GlobalInvocationID.x); + imageStore(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y), ivec4(value)); +} + +void main() +{ + int coord_x = int(gl_GlobalInvocationID.x); + uint plane_idx = gl_GlobalInvocationID.z; + ivec2 work_area = imageSize(coef_buf[plane_idx]); + int dist = 1 << level; + if (coord_x >= work_area.x || (coord_x & (dist - 1)) != 0) + return; + + // Lifting stage 2 + for (int y = dist; y < work_area.y - 2 * dist; y += 2 * dist) { + int lhs = image_load(y - dist); + int rhs = image_load(y + dist); + int value = image_load(y); + value -= (lhs + rhs + 1) >> 1; + image_store(y, value); + } + int lhs = image_load(work_area.y - 2 * dist); + int value = image_load(work_area.y - dist); + value -= (2 * lhs + 1) >> 1; + image_store(work_area.y - dist, value); + + // Lifting stage 1 + lhs = image_load(dist); + value = image_load(0); + value += (2 * lhs + 2) >> 2; + image_store(0, value); + for (int y = 2 * dist; y <= work_area.y - 2 * dist; y += 2 * dist) { + int lhs = image_load(y + dist); + int rhs = image_load(y - dist); + int value = image_load(y); + value += (lhs + rhs + 2) >> 2; + image_store(y, value); + } +} diff --git a/libavcodec/vulkan/vc2_encode.comp b/libavcodec/vulkan/vc2_encode.comp new file mode 100644 index 0000000000..4d8adcca61 --- /dev/null +++ b/libavcodec/vulkan/vc2_encode.comp @@ -0,0 +1,159 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_shader_explicit_arithmetic_types : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_debug_printf : require + +#define MAX_DWT_LEVELS (5) + +layout(push_constant, scalar) uniform ComputeInfo { + u8buf bytestream; + ivec2 num_slices; + int wavelet_depth; + int size_scaler; + int prefix_bytes; +}; + +void put_vc2_ue_uint(inout PutBitContext pb, uint val) +{ + uint32_t pbits = 1; + int bits = 1; + + ++val; + + while ((val >> 8) != 0) + { + pbits |= uint32_t(interleaved_ue_golomb_tab[val & 0xff]) << bits; + val >>= 8; + bits += 16; + } + + pbits |= uint32_t(top_interleaved_ue_golomb_tab[val]) << bits; + bits += golomb_len_tab[val]; + put_bits(pb, bits, pbits); +} + +int quants[MAX_DWT_LEVELS][4]; + +int subband_coord(int index, int h, int lvl) +{ + int coord = index; + coord <<= 1; + coord |= h; + coord <<= (wavelet_depth-lvl-1); + return coord; +} + +void main() +{ + int slice_index = int(gl_GlobalInvocationID.x); + int max_index = num_slices.x * num_slices.y; + if (slice_index >= max_index) + return; + + /* Step 2. Quantize and encode */ + int pb_start = slice_args[slice_index].pb_start; + int workgroup_x = int(gl_WorkGroupSize.x); + for (int i = 0, index = workgroup_x - 1; i < gl_WorkGroupID.x; i++) { + pb_start += slice_args[index].pb_start + slice_args[index].bytes; + index += workgroup_x; + } + ivec2 slice_coord = ivec2(slice_index % num_slices.x, slice_index / num_slices.x); + int slice_bytes_max = slice_args[slice_index].bytes; + int quant_index = slice_args[slice_index].quant_idx; + + PutBitContext pb; + init_put_bits(pb, OFFBUF(u8buf, bytestream, pb_start), slice_bytes_max); + + for (int level = 0; level < wavelet_depth; level++) + for (int orientation = int(level > 0); orientation < 4; orientation++) + quants[level][orientation] = max(quant_index - lut_quant[level][orientation], 0); + + /* Write quant index for this slice */ + put_bits(pb, 8, quant_index); + + /* Luma + 2 Chroma planes */ + for (int p = 0; p < 3; p++) + { + int pad_s, pad_c; + int bytes_start = int32_t(put_bytes_count(pb)); + + /* Save current location and write a zero value */ + uint64_t write_ptr_start = pb.buf; + int bit_left_start = pb.bit_left; + put_bits(pb, 8, 0); + + ivec2 dwt_dim = imageSize(coef_buf[p]); + for (int level = 0; level < wavelet_depth; level++) + { + ivec2 band_size = dwt_dim >> (wavelet_depth - level); + for (int o = int(level > 0); o < 4; o++) + { + /* Encode subband */ + int left = band_size.x * (slice_coord.x) / num_slices.x; + int right = band_size.x * (slice_coord.x+1) / num_slices.x; + int top = band_size.y * (slice_coord.y) / num_slices.y; + int bottom = band_size.y * (slice_coord.y+1) / num_slices.y; + + const int q_idx = quants[level][o]; + const int qfactor = ff_dirac_qscale_tab[q_idx]; + + const int yh = o >> 1; + const int xh = o & 1; + + for (int y = top; y < bottom; y++) + { + for (int x = left; x < right; x++) + { + int sx = subband_coord(x, xh, level); + int sy = subband_coord(y, yh, level); + int coef = imageLoad(coef_buf[p], ivec2(sx, sy)).x; + uint c_abs = uint(abs(coef)); + c_abs = (c_abs << 2) / qfactor; + put_vc2_ue_uint(pb, c_abs); + if (c_abs != 0) + put_bits(pb, 1, int(coef < 0)); + } + } + } + } + flush_put_bits(pb); + int bytes_len = int32_t(put_bytes_count(pb)) - bytes_start - 1; + if (p == 2) + { + int len_diff = slice_bytes_max - int32_t(put_bytes_count(pb)); + pad_s = align((bytes_len + len_diff), size_scaler)/size_scaler; + pad_c = (pad_s*size_scaler) - bytes_len; + } + else + { + pad_s = align(bytes_len, size_scaler)/size_scaler; + pad_c = (pad_s*size_scaler) - bytes_len; + } + uint64_t start_ptr = write_ptr_start + ((BUF_BITS - bit_left_start) >> 3); + u8buf(start_ptr).v = uint8_t(pad_s); + /* vc2-reference uses that padding that decodes to '0' coeffs */ + skip_put_bytes(pb, pad_c); + } +} diff --git a/libavcodec/vulkan/vc2_slice_sizes.comp b/libavcodec/vulkan/vc2_slice_sizes.comp new file mode 100644 index 0000000000..61070c1dc2 --- /dev/null +++ b/libavcodec/vulkan/vc2_slice_sizes.comp @@ -0,0 +1,170 @@ +/* + * VC2 codec + * + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#extension GL_EXT_shader_explicit_arithmetic_types : require +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_buffer_reference : require + +#define DIRAC_MAX_QUANT_INDEX 116 +#define MAX_DWT_LEVELS 5 + +layout(push_constant, scalar) uniform ComputeInfo { + ivec2 num_slices; + int wavelet_depth; + int size_scaler; + int prefix_bytes; + int bits_ceil; + int bits_floor; +}; + +int count_vc2_ue_uint(uint val) +{ + return 2 * findMSB(val + 1) + 1; +} + +int cache[DIRAC_MAX_QUANT_INDEX]; +int quants[MAX_DWT_LEVELS][4]; +shared int slice_sizes[gl_WorkGroupSize.x]; + +int subband_coord(int index, int h, int lvl) +{ + int coord = index; + coord <<= 1; + coord |= h; + coord <<= (wavelet_depth-lvl-1); + return coord; +} + +int count_hq_slice(int quant_index) +{ + int bits = 0; + if (cache[quant_index] != 0) + return cache[quant_index]; + + bits += 8*prefix_bytes; + bits += 8; /* quant_idx */ + + for (int level = 0; level < wavelet_depth; level++) + for (int orientation = int(level > 0); orientation < 4; orientation++) + quants[level][orientation] = max(quant_index - lut_quant[level][orientation], 0); + + int slice_index = int(gl_GlobalInvocationID.x); + ivec2 slice_coord = ivec2(slice_index % num_slices.x, slice_index / num_slices.x); + for (int p = 0; p < 3; p++) + { + int bytes_start = bits >> 3; + bits += 8; + + ivec2 dwt_dim = imageSize(coef_buf[p]); + for (int level = 0; level < wavelet_depth; level++) + { + ivec2 band_dim = dwt_dim >> (wavelet_depth - level); + for (int o = int(level > 0); o < 4; o++) + { + const int left = band_dim.x * slice_coord.x / num_slices.x; + const int right = band_dim.x * (slice_coord.x+1) / num_slices.x; + const int top = band_dim.y * slice_coord.y / num_slices.y; + const int bottom = band_dim.y * (slice_coord.y+1) / num_slices.y; + + const int q_idx = quants[level][o]; + const int qfactor = ff_dirac_qscale_tab[q_idx]; + + const int yh = o >> 1; + const int xh = o & 1; + + for (int y = top; y < bottom; y++) + { + for (int x = left; x < right; x++) + { + int sx = subband_coord(x, xh, level); + int sy = subband_coord(y, yh, level); + int coef = imageLoad(coef_buf[p], ivec2(sx, sy)).x; + uint c_abs = uint(abs(coef)); + c_abs = (c_abs << 2) / qfactor; + bits += count_vc2_ue_uint(c_abs); + bits += int(c_abs > 0); + } + } + } + } + bits += align(bits, 8) - bits; + int bytes_len = (bits >> 3) - bytes_start - 1; + int pad_s = align(bytes_len, size_scaler) / size_scaler; + int pad_c = (pad_s * size_scaler) - bytes_len; + bits += pad_c * 8; + } + + cache[quant_index] = bits; + return bits; +} + +int ssize_round(int b) +{ + return align(b, size_scaler) + 4 + prefix_bytes; +} + +void main() +{ + int slice_index = int(gl_GlobalInvocationID.x); + int max_index = num_slices.x * num_slices.y; + if (slice_index >= max_index) + return; + + for (int i = 0; i < DIRAC_MAX_QUANT_INDEX; i++) + cache[i] = 0; + + const int q_ceil = DIRAC_MAX_QUANT_INDEX; + const int top = bits_ceil; + const int bottom = bits_floor; + int quant_buf[2] = int[2](-1, -1); + int quant = slice_args[slice_index].quant_idx; + int step = 1; + int bits_last = 0; + int bits = count_hq_slice(quant); + while ((bits > top) || (bits < bottom)) + { + const int signed_step = bits > top ? +step : -step; + quant = clamp(quant + signed_step, 0, q_ceil-1); + bits = count_hq_slice(quant); + if (quant_buf[1] == quant) + { + quant = max(quant_buf[0], quant); + bits = quant == quant_buf[0] ? bits_last : bits; + break; + } + step = clamp(step / 2, 1, (q_ceil - 1) / 2); + quant_buf[1] = quant_buf[0]; + quant_buf[0] = quant; + bits_last = bits; + } + int bytes = ssize_round(bits >> 3); + slice_args[slice_index].quant_idx = clamp(quant, 0, q_ceil-1); + slice_args[slice_index].bytes = bytes; + slice_sizes[gl_LocalInvocationIndex] = bytes; + barrier(); + + /* Prefix sum for all slices in current workgroup */ + int total_bytes = 0; + for (int i = 0; i < gl_LocalInvocationIndex; i++) + total_bytes += slice_sizes[i]; + slice_args[slice_index].pb_start = total_bytes; +} -- 2.49.0 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths. 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths IndecisiveTurtle @ 2025-05-17 20:50 ` IndecisiveTurtle 2025-05-19 17:09 ` Andreas Rheinhardt 1 sibling, 0 replies; 10+ messages in thread From: IndecisiveTurtle @ 2025-05-17 20:50 UTC (permalink / raw) To: ffmpeg-devel I tried to solve all the review comments from the last patchset, please review in case I missed anything. Thanks Στις Σάβ 17 Μαΐ 2025 στις 11:49 μ.μ., ο/η IndecisiveTurtle <geoster3d@gmail.com> έγραψε: > > From: IndecisiveTurtle <geoster3d@gmail.com> > > Performance wise, encoding a 3440x1440 1-minute video is performed in about 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it takes about 1.3 minutes on my NVIDIA GTX 1650 > > Haar shader has a subgroup optimized variant that applies when configured wavelet depth allows it > --- > configure | 1 + > libavcodec/Makefile | 3 + > libavcodec/allcodecs.c | 1 + > libavcodec/vc2enc_vulkan.c | 775 +++++++++++++++++++ > libavcodec/vulkan/vc2_dwt_haar.comp | 82 ++ > libavcodec/vulkan/vc2_dwt_haar_subgroup.comp | 75 ++ > libavcodec/vulkan/vc2_dwt_hor_legall.comp | 82 ++ > libavcodec/vulkan/vc2_dwt_upload.comp | 96 +++ > libavcodec/vulkan/vc2_dwt_ver_legall.comp | 78 ++ > libavcodec/vulkan/vc2_encode.comp | 159 ++++ > libavcodec/vulkan/vc2_slice_sizes.comp | 170 ++++ > 11 files changed, 1522 insertions(+) > create mode 100644 libavcodec/vc2enc_vulkan.c > create mode 100644 libavcodec/vulkan/vc2_dwt_haar.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_haar_subgroup.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_hor_legall.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_upload.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_ver_legall.comp > create mode 100644 libavcodec/vulkan/vc2_encode.comp > create mode 100644 libavcodec/vulkan/vc2_slice_sizes.comp > > diff --git a/configure b/configure > index 2e69b3c56c..09f9dff258 100755 > --- a/configure > +++ b/configure > @@ -3132,6 +3132,7 @@ utvideo_encoder_select="bswapdsp huffman llvidencdsp" > vble_decoder_select="llviddsp" > vbn_decoder_select="texturedsp" > vbn_encoder_select="texturedspenc" > +vc2_vulkan_encoder_select="vulkan spirv_compiler" > vmix_decoder_select="idctdsp" > vc1_decoder_select="blockdsp h264qpel intrax8 mpegvideodec qpeldsp vc1dsp" > vc1image_decoder_select="vc1_decoder" > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index bdf0d6742e..20968520d7 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -772,6 +772,9 @@ OBJS-$(CONFIG_VC1_MMAL_DECODER) += mmaldec.o > OBJS-$(CONFIG_VC1_QSV_DECODER) += qsvdec.o > OBJS-$(CONFIG_VC1_V4L2M2M_DECODER) += v4l2_m2m_dec.o > OBJS-$(CONFIG_VC2_ENCODER) += vc2enc.o vc2enc_dwt.o vc2enc_common.o diractab.o > +OBJS-$(CONFIG_VC2_VULKAN_ENCODER) += vc2enc_vulkan.o vulkan/vc2_encode.o vulkan/vc2_slice_sizes.o \ > + vulkan/vc2_dwt_hor_legall.o vulkan/vc2_dwt_ver_legall.o \ > + vulkan/vc2_dwt_upload.o vulkan/vc2_dwt_haar.o vulkan/vc2_dwt_haar_subgroup.o > OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o > OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdaudio.o > OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdvideo.o > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c > index cd4f6ecd59..cd23a9490c 100644 > --- a/libavcodec/allcodecs.c > +++ b/libavcodec/allcodecs.c > @@ -367,6 +367,7 @@ extern const FFCodec ff_vc1_mmal_decoder; > extern const FFCodec ff_vc1_qsv_decoder; > extern const FFCodec ff_vc1_v4l2m2m_decoder; > extern const FFCodec ff_vc2_encoder; > +extern const FFCodec ff_vc2_vulkan_encoder; > extern const FFCodec ff_vcr1_decoder; > extern const FFCodec ff_vmdvideo_decoder; > extern const FFCodec ff_vmix_decoder; > diff --git a/libavcodec/vc2enc_vulkan.c b/libavcodec/vc2enc_vulkan.c > new file mode 100644 > index 0000000000..23b204cf92 > --- /dev/null > +++ b/libavcodec/vc2enc_vulkan.c > @@ -0,0 +1,775 @@ > +/* > + * Copyright (C) 2025 raphaelthegreat <geoster3d@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#include "libavutil/avassert.h" > +#include "libavutil/mem.h" > +#include "libavutil/pixdesc.h" > +#include "libavutil/opt.h" > +#include "libavutil/thread.h" > +#include "libavutil/version.h" > +#include "libavutil/vulkan_spirv.h" > +#include "libavutil/hwcontext_vulkan.h" > +#include "libavutil/vulkan_loader.h" > +#include "libavutil/vulkan.h" > +#include "codec_internal.h" > +#include "internal.h" > +#include "encode.h" > +#include "version.h" > +#include "vc2enc_common.h" > +#include "hwconfig.h" > + > +#define LEGALL_TILE_DIM 16 > +#define LEGALL_WORKGROUP_X 64 > +#define SLICE_WORKGROUP_X 128 > +#define MAX_NUM_PLANES 3 > + > +extern const char *ff_source_common_comp; > +extern const char *ff_source_vc2_encode_comp; > +extern const char *ff_source_vc2_dwt_hor_legall_comp; > +extern const char *ff_source_vc2_dwt_ver_legall_comp; > +extern const char *ff_source_vc2_slice_sizes_comp; > +extern const char *ff_source_vc2_dwt_upload_comp; > +extern const char *ff_source_vc2_dwt_haar_comp; > +extern const char *ff_source_vc2_dwt_haar_subgroup_comp; > + > +typedef struct VC2DwtPushData { > + int s; > + union { > + int diff_offset; > + int plane_idx; > + }; > + int level; > +} VC2DwtPushData; > + > +typedef struct VC2EncAuxData { > + int quant[MAX_DWT_LEVELS][4]; > + int ff_dirac_qscale_tab[116]; > + uint16_t interleaved_ue_golomb_tab[256]; > + uint16_t top_interleaved_ue_golomb_tab[256]; > + uint8_t golomb_len_tab[256]; > +} VC2EncAuxData; > + > +typedef struct VC2EncPushData { > + VkDeviceAddress pb; > + int num_x; > + int num_y; > + int wavelet_depth; > + int size_scaler; > + int prefix_bytes; > +} VC2EncPushData; > + > +typedef struct VC2EncSliceArgs { > + int quant_idx; > + int bytes; > + int pb_start; > + int pad; > +} VC2EncSliceArgs; > + > +typedef struct VC2EncSliceCalcPushData { > + int num_x; > + int num_y; > + int wavelet_depth; > + int size_scaler; > + int prefix_bytes; > + int bits_ceil; > + int bits_floor; > +} VC2EncSliceCalcPushData; > + > +typedef struct VC2EncVulkanContext { > + VC2EncContext base; > + FFVkBuffer lut_buf; > + FFVkBuffer slice_buf; > + VC2EncSliceArgs *slice_args; > + > + /* Vulkan state */ > + FFVulkanContext vkctx; > + AVVulkanDeviceQueueFamily *qf; > + FFVkExecPool e; > + FFVkExecContext *exec; > + > + FFVulkanShader dwt_haar_shd; > + FFVulkanShader dwt_upload_shd; > + FFVulkanShader dwt_hor_shd, dwt_ver_shd; > + FFVulkanShader slice_shd; > + FFVulkanShader enc_shd; > + AVBufferPool* dwt_buf_pool; > + int haar_subgroup; > + > + VkBuffer plane_buf; > + VC2EncPushData enc_consts; > + VC2DwtPushData dwt_consts; > + VC2EncSliceCalcPushData calc_consts; > + > + /* Intermediate frame pool */ > + AVBufferRef *intermediate_frames_ref[3]; > + AVFrame *intermediate_frame[AV_NUM_DATA_POINTERS]; > + VkImageView intermediate_views[AV_NUM_DATA_POINTERS]; > +} VC2EncVulkanContext; > + > +static int init_vulkan_pipeline(VC2EncVulkanContext* s, FFVkSPIRVCompiler *spv, > + FFVulkanShader* shd, int push_size, > + int lg_x, int lg_y, int lg_z, > + const char* pl_name, const char* pl_source, > + int start_desc, int num_desc) > +{ > + int err = 0; > + uint8_t *spv_data; > + size_t spv_len; > + void *spv_opaque = NULL; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanDescriptorSetBinding *desc; > + > + ff_vk_shader_init(vkctx, shd, pl_name, VK_SHADER_STAGE_COMPUTE_BIT, > + NULL, 0, lg_x, lg_y, lg_z, 0); > + > + av_bprintf(&shd->src, "struct SliceArgs {int quant_idx;int bytes;int pb_start;int pad;};\n"); > + > + desc = (FFVulkanDescriptorSetBinding []) { > + { > + .name = "src_planes", > + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, > + .mem_layout = ff_vk_shader_rep_fmt(vkctx->frames->sw_format, FF_VK_REP_UINT), > + .dimensions = 2, > + .elems = av_pix_fmt_count_planes(vkctx->frames->sw_format), > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + }, > + { > + .name = "coef_buf", > + .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, > + .mem_layout = "r32i", > + .dimensions = 2, > + .elems = 3, > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + }, > + { > + .name = "AuxData", > + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .mem_layout = "scalar", > + .buf_content = "int lut_quant[5][4]; int ff_dirac_qscale_tab[116]; " > + "uint16_t interleaved_ue_golomb_tab[256]; " > + "uint16_t top_interleaved_ue_golomb_tab[256]; " > + "uint8_t golomb_len_tab[256];", > + }, > + { > + .name = "SliceBuffer", > + .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, > + .stages = VK_SHADER_STAGE_COMPUTE_BIT, > + .mem_layout = "scalar", > + .buf_content = "SliceArgs slice_args[];", > + }, > + }; > + RET(ff_vk_shader_add_descriptor_set(vkctx, shd, desc + start_desc, num_desc, 0, 0)); > + > + ff_vk_shader_add_push_const(shd, 0, push_size, VK_SHADER_STAGE_COMPUTE_BIT); > + av_bprintf(&shd->src, "#define PB_UNALIGNED\n"); > + av_bprintf(&shd->src, "#define PLANE_FMT %d\n", vkctx->frames->sw_format); > + GLSLD(ff_source_common_comp); > + GLSLD(pl_source); > + > + /* Compile Haar shader */ > + RET(spv->compile_shader(vkctx, spv, shd, &spv_data, &spv_len, "main", &spv_opaque)); > + RET(ff_vk_shader_link(vkctx, shd, spv_data, spv_len, "main")); > + RET(ff_vk_shader_register_exec(vkctx, &s->e, shd)); > + > +fail: > + return err; > +} > + > +static int init_frame_pools(AVCodecContext *avctx) > +{ > + int i, err = 0; > + VC2EncVulkanContext *sv = avctx->priv_data; > + AVHWFramesContext *frames_ctx; > + AVVulkanFramesContext *vk_frames; > + enum AVPixelFormat sw_format = AV_PIX_FMT_GRAY32; > + > + for (i = 0; i < 3; i++) { > + sv->intermediate_frames_ref[i] = av_hwframe_ctx_alloc(sv->vkctx.device_ref); > + if (!sv->intermediate_frames_ref[i]) > + return AVERROR(ENOMEM); > + > + frames_ctx = (AVHWFramesContext *)sv->intermediate_frames_ref[i]->data; > + frames_ctx->format = AV_PIX_FMT_VULKAN; > + frames_ctx->sw_format = sw_format; > + frames_ctx->width = sv->base.plane[i].dwt_width; > + frames_ctx->height = sv->base.plane[i].dwt_height; > + > + vk_frames = frames_ctx->hwctx; > + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; > + vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT; > + vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; > + > + err = av_hwframe_ctx_init(sv->intermediate_frames_ref[i]); > + if (err < 0) { > + av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n", > + av_get_pix_fmt_name(sw_format), av_err2str(err)); > + av_buffer_unref(&sv->intermediate_frames_ref[i]); > + return err; > + } > + } > + > + return err; > +} > + > +static void vulkan_bind_img_planes(FFVulkanContext *s, FFVkExecContext *e, > + FFVulkanShader *shd, VkImageView *views, > + int set, int binding) > +{ > + for (int i = 0; i < 3; i++) > + ff_vk_shader_update_img(s, e, shd, set, binding, i, > + views[i], VK_IMAGE_LAYOUT_GENERAL, > + VK_NULL_HANDLE); > +} > + > +static void dwt_plane_haar(VC2EncVulkanContext *s, FFVkExecContext *exec, > + VkImageMemoryBarrier2* img_bar, int nb_img_bar) > +{ > + int p, group_x, group_y; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanFunctions *vk = &vkctx->vkfn; > + Plane* plane; > + > + s->dwt_consts.level = s->base.wavelet_depth; > + vulkan_bind_img_planes(vkctx, exec, &s->dwt_haar_shd, s->intermediate_views, 0, 0); > + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_haar_shd); > + > + /* Haar pass */ > + for (p = 0; p < 3; p++) { > + plane = &s->base.plane[p]; > + s->dwt_consts.plane_idx = p; > + if (s->haar_subgroup) { > + group_x = FFALIGN(plane->dwt_width, 8) >> 3; > + group_y = FFALIGN(plane->dwt_height, 8) >> 3; > + } else { > + group_x = FFALIGN(plane->dwt_width, 32) >> 5; > + group_y = FFALIGN(plane->dwt_height, 32) >> 5; > + } > + > + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_haar_shd, VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(VC2DwtPushData), &s->dwt_consts); > + vk->CmdDispatch(exec->buf, group_x, group_y, 1); > + } > + > + /* Wait for haar dispatches to complete */ > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pImageMemoryBarriers = img_bar, > + .imageMemoryBarrierCount = nb_img_bar, > + }); > +} > + > +static void dwt_plane_legall(VC2EncVulkanContext *s, FFVkExecContext *exec, > + VkImageMemoryBarrier2* img_bar, int nb_img_bar) > +{ > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanFunctions *vk = &vkctx->vkfn; > + int legall_group_x = (s->base.plane[0].dwt_height + LEGALL_WORKGROUP_X - 1) >> 6; > + int legall_group_y = (s->base.plane[0].dwt_width + LEGALL_WORKGROUP_X - 1) >> 6; > + int i; > + > + /* Perform legall wavelet trasform */ > + for (i = 0; i < s->base.wavelet_depth; i++) { > + s->dwt_consts.level = i; > + > + /* Horizontal legall pass */ > + vulkan_bind_img_planes(vkctx, exec, &s->dwt_hor_shd, s->intermediate_views, 0, 0); > + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_hor_shd); > + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_hor_shd, VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(VC2DwtPushData), &s->dwt_consts); > + vk->CmdDispatch(exec->buf, legall_group_x, 1, 3); > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pImageMemoryBarriers = img_bar, > + .imageMemoryBarrierCount = nb_img_bar, > + }); > + > + /* Vertical legall pass */ > + vulkan_bind_img_planes(vkctx, exec, &s->dwt_ver_shd, s->intermediate_views, 0, 0); > + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_ver_shd); > + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_ver_shd, VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(VC2DwtPushData), &s->dwt_consts); > + vk->CmdDispatch(exec->buf, legall_group_y, 1, 3); > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pImageMemoryBarriers = img_bar, > + .imageMemoryBarrierCount = nb_img_bar, > + }); > + } > +} > + > +static int dwt_planes(VC2EncVulkanContext *s, AVFrame *frame) > +{ > + int i, err = 0, nb_img_bar = 0; > + int wavelet_idx = s->base.wavelet_idx; > + int group_x = s->base.plane[0].dwt_width >> 3; > + int group_y = s->base.plane[0].dwt_height >> 3; > + FFVulkanContext *vkctx = &s->vkctx; > + FFVulkanFunctions *vk = &vkctx->vkfn; > + FFVkExecContext *exec = s->exec; > + VkImageView views[AV_NUM_DATA_POINTERS]; > + VkImageMemoryBarrier2 img_bar[AV_NUM_DATA_POINTERS]; > + > + /* Generate barriers and image views for frame images. */ > + RET(ff_vk_exec_add_dep_frame(vkctx, exec, frame, > + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, > + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); > + RET(ff_vk_create_imageviews(vkctx, exec, views, frame, FF_VK_REP_UINT)); > + ff_vk_frame_barrier(vkctx, exec, frame, img_bar, &nb_img_bar, > + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, > + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, > + VK_ACCESS_SHADER_READ_BIT, > + VK_IMAGE_LAYOUT_GENERAL, > + VK_QUEUE_FAMILY_IGNORED); > + > + /* Submit the image barriers. */ > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pImageMemoryBarriers = img_bar, > + .imageMemoryBarrierCount = nb_img_bar, > + }); > + > + /* Create a temporaty frames */ > + nb_img_bar = 0; > + for (i = 0; i < 3; i++) { > + s->intermediate_frame[i] = av_frame_alloc(); > + if (!s->intermediate_frame[i]) > + return AVERROR(ENOMEM); > + > + RET(av_hwframe_get_buffer(s->intermediate_frames_ref[i], > + s->intermediate_frame[i], 0)); > + RET(ff_vk_exec_add_dep_frame(vkctx, exec, s->intermediate_frame[i], > + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, > + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); > + RET(ff_vk_create_imageviews(vkctx, exec, &s->intermediate_views[i], > + s->intermediate_frame[i], FF_VK_REP_INT)); > + ff_vk_frame_barrier(vkctx, exec, s->intermediate_frame[i], img_bar, &nb_img_bar, > + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, > + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, > + VK_ACCESS_SHADER_READ_BIT, > + VK_IMAGE_LAYOUT_GENERAL, > + VK_QUEUE_FAMILY_IGNORED); > + } > + > + /* Submit the image barriers. */ > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pImageMemoryBarriers = img_bar, > + .imageMemoryBarrierCount = nb_img_bar, > + }); > + > + /* Bind input images to the shader. */ > + ff_vk_shader_update_img_array(vkctx, exec, &s->dwt_upload_shd, frame, views, 0, 0, > + VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); > + vulkan_bind_img_planes(vkctx, exec, &s->dwt_upload_shd, s->intermediate_views, 0, 1); > + > + /* Upload coefficients from planes to the buffer. */ > + s->dwt_consts.diff_offset = s->base.diff_offset; > + ff_vk_exec_bind_shader(vkctx, exec, &s->dwt_upload_shd); > + ff_vk_shader_update_push_const(vkctx, exec, &s->dwt_upload_shd, VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(VC2DwtPushData), &s->dwt_consts); > + vk->CmdDispatch(exec->buf, group_x, group_y, 1); > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pImageMemoryBarriers = img_bar, > + .imageMemoryBarrierCount = nb_img_bar, > + }); > + > + /* Perform wavelet trasform. */ > + if (wavelet_idx == VC2_TRANSFORM_HAAR || wavelet_idx == VC2_TRANSFORM_HAAR_S) > + dwt_plane_haar(s, exec, img_bar, nb_img_bar); > + else if (wavelet_idx == VC2_TRANSFORM_5_3) > + dwt_plane_legall(s, exec, img_bar, nb_img_bar); > + > +fail: > + return err; > +} > + > +static void encode_slices(VC2EncContext *s) > +{ > + VC2EncVulkanContext *sv = (VC2EncVulkanContext*)s; > + FFVkExecContext *exec = sv->exec; > + int num_slices = s->num_x * s->num_y; > + int num_slice_groups = (num_slices + SLICE_WORKGROUP_X - 1) >> 7; > + int i, skip = 0; > + FFVulkanContext *vkctx = &sv->vkctx; > + FFVulkanFunctions *vk = &vkctx->vkfn; > + > + /* Calculate slice sizes. */ > + vulkan_bind_img_planes(vkctx, exec, &sv->slice_shd, sv->intermediate_views, 0, 0); > + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->slice_shd, > + 0, 1, 0, &sv->lut_buf, 0, > + sizeof(VC2EncAuxData), > + VK_FORMAT_UNDEFINED); > + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->slice_shd, > + 0, 2, 0, &sv->slice_buf, 0, > + sv->slice_buf.size, > + VK_FORMAT_UNDEFINED); > + ff_vk_exec_bind_shader(vkctx, exec, &sv->slice_shd); > + ff_vk_shader_update_push_const(vkctx, exec, &sv->slice_shd, VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(VC2EncSliceCalcPushData), &sv->calc_consts); > + vk->CmdDispatch(exec->buf, num_slice_groups, 1, 1); > + > + flush_put_bits(&s->pb); > + sv->enc_consts.pb += put_bytes_output(&s->pb); > + > + /* Wait for slice sizes to be written. */ > + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { > + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, > + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { > + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, > + .srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, > + .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, > + .srcAccessMask = VK_ACCESS_2_SHADER_WRITE_BIT, > + .dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT, > + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, > + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, > + .buffer = sv->slice_buf.buf, > + .size = sizeof(VC2EncSliceArgs) * num_slices, > + .offset = 0, > + }, > + .bufferMemoryBarrierCount = 1U, > + }); > + > + /* Perform the encoding. */ > + vulkan_bind_img_planes(vkctx, exec, &sv->enc_shd, sv->intermediate_views, 0, 0); > + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->enc_shd, > + 0, 1, 0, &sv->lut_buf, 0, > + sizeof(VC2EncAuxData), > + VK_FORMAT_UNDEFINED); > + ff_vk_shader_update_desc_buffer(vkctx, exec, &sv->enc_shd, > + 0, 2, 0, &sv->slice_buf, 0, > + sv->slice_buf.size, > + VK_FORMAT_UNDEFINED); > + ff_vk_exec_bind_shader(vkctx, exec, &sv->enc_shd); > + ff_vk_shader_update_push_const(vkctx, exec, &sv->enc_shd, VK_SHADER_STAGE_COMPUTE_BIT, > + 0, sizeof(VC2EncPushData), &sv->enc_consts); > + > + vk->CmdDispatch(exec->buf, num_slice_groups, 1, 1); > + > + ff_vk_exec_submit(vkctx, exec); > + ff_vk_exec_wait(vkctx, exec); > + > + for (int slice_y = 0; slice_y < s->num_y; slice_y++) { > + for (int slice_x = 0; slice_x < s->num_x; slice_x++) { > + VC2EncSliceArgs *args = &sv->slice_args[s->num_x * slice_y + slice_x]; > + skip += args->bytes; > + } > + } > + > + /* Skip forward to write end header */ > + skip_put_bytes(&s->pb, skip); > + > + /* Free allocated intermediate frames */ > + for (i = 0; i < 3; i++) > + av_frame_free(&sv->intermediate_frame[i]); > +} > + > +static int encode_frame(VC2EncVulkanContext *sv, AVPacket *avpkt, > + const AVFrame *frame, const int header_size) > +{ > + int ret; > + int64_t max_frame_bytes; > + AVBufferRef *avpkt_buf = NULL; > + FFVkBuffer* buf_vk = NULL; > + VC2EncContext* s = &sv->base; > + FFVulkanContext *vkctx = &sv->vkctx; > + > + /* Perform wavelet pass on the input data. */ > + ret = dwt_planes(sv, (AVFrame*)frame); > + if (ret) > + return ret; > + > + /* Allocate a buffer that can fit at all all 3 planes of data */ > + max_frame_bytes = header_size + MAX_NUM_PLANES * s->avctx->width > + * s->avctx->height > + * sizeof(dwtcoef); > + > + /* Get a pooled device local host visible buffer for writing output data */ > + ret = ff_vk_get_pooled_buffer(vkctx, &sv->dwt_buf_pool, &avpkt_buf, > + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | > + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, NULL, > + max_frame_bytes, > + VK_MEMORY_PROPERTY_HOST_CACHED_BIT | > + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | > + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); > + if (ret < 0) > + return ret; > + > + ff_vk_exec_add_dep_buf(vkctx, sv->exec, &avpkt_buf, 1, 1); > + buf_vk = (FFVkBuffer *)avpkt_buf->data; > + sv->enc_consts.pb = buf_vk->address; > + > + /* Initialize packet. */ > + avpkt->buf = avpkt_buf; > + avpkt->data = buf_vk->mapped_mem; > + avpkt->size = max_frame_bytes; > + init_put_bits(&s->pb, avpkt->data, avpkt->size); > + > + /* Encode frame */ > + ff_vc2_encode_frame(s, encode_slices); > + > + return 0; > +} > + > +static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, > + const AVFrame *frame, int *got_packet) > +{ > + int ret = 0; > + VC2EncVulkanContext *sv = avctx->priv_data; > + VC2EncContext *s = &sv->base; > + const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; > + const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT); > + const int header_size = 100 + aux_data_size; > + > + ret = ff_vc2_frame_init_properties(avctx, s); > + if (ret) > + return ret; > + > + sv->calc_consts.size_scaler = s->size_scaler; > + sv->calc_consts.bits_ceil = s->slice_max_bytes << 3; > + sv->calc_consts.bits_floor = s->slice_min_bytes << 3; > + sv->enc_consts.prefix_bytes = 0; > + sv->enc_consts.size_scaler = s->size_scaler; > + > + sv->exec = ff_vk_exec_get(&sv->vkctx, &sv->e); > + ff_vk_exec_start(&sv->vkctx, sv->exec); > + > + ret = encode_frame(sv, avpkt, frame, header_size); > + if (ret) > + return ret; > + > + flush_put_bits(&s->pb); > + av_shrink_packet(avpkt, put_bytes_output(&s->pb)); > + avpkt->flags |= AV_PKT_FLAG_KEY; > + *got_packet = 1; > + > + return 0; > +} > + > +static av_cold int vc2_encode_end(AVCodecContext *avctx) > +{ > + VC2EncVulkanContext *sv = avctx->priv_data; > + FFVulkanContext *vkctx = &sv->vkctx; > + int i; > + > + ff_vk_exec_pool_free(vkctx, &sv->e); > + > + ff_vk_shader_free(vkctx, &sv->dwt_upload_shd); > + ff_vk_shader_free(vkctx, &sv->dwt_haar_shd); > + ff_vk_shader_free(vkctx, &sv->dwt_hor_shd); > + ff_vk_shader_free(vkctx, &sv->dwt_ver_shd); > + ff_vk_shader_free(vkctx, &sv->slice_shd); > + ff_vk_shader_free(vkctx, &sv->enc_shd); > + > + ff_vk_free_buf(vkctx, &sv->slice_buf); > + ff_vk_free_buf(vkctx, &sv->lut_buf); > + > + for (i = 0; i < 3; i++) { > + ff_vc2enc_free_transforms(&sv->base.transform_args[i].t); > + av_buffer_unref(&sv->intermediate_frames_ref[i]); > + } > + > + av_buffer_pool_uninit(&sv->dwt_buf_pool); > + ff_vk_uninit(vkctx); > + > + return 0; > +} > + > +static av_cold int vc2_encode_init(AVCodecContext *avctx) > +{ > + int err = 0, depth; > + const AVPixFmtDescriptor *fmt; > + VC2EncVulkanContext *sv = avctx->priv_data; > + VC2EncContext *s = &sv->base; > + FFVulkanContext *vkctx = &sv->vkctx; > + FFVkSPIRVCompiler *spv; > + VC2EncAuxData *ad = NULL; > + unsigned int subgroup_size = vkctx->subgroup_props.maxSubgroupSize; > + > + /* Init vulkan */ > + err = ff_vk_init(&sv->vkctx, avctx, NULL, avctx->hw_frames_ctx); > + if (err < 0) > + return err; > + > + sv->qf = ff_vk_qf_find(vkctx, VK_QUEUE_COMPUTE_BIT, 0); > + if (!sv->qf) { > + av_log(avctx, AV_LOG_ERROR, "Device has no compute queues!\n"); > + return AVERROR(ENOTSUP); > + } > + > + spv = ff_vk_spirv_init(); > + if (!spv) { > + av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n"); > + return AVERROR_EXTERNAL; > + } > + > + ff_vk_exec_pool_init(vkctx, sv->qf, &sv->e, 1, 0, 0, 0, NULL); > + > + /* Chroma subsampling */ > + err = av_pix_fmt_get_chroma_sub_sample(vkctx->frames->sw_format, &s->chroma_x_shift, > + &s->chroma_y_shift); > + if (err < 0) > + return err; > + > + /* Bit depth and color range index */ > + fmt = av_pix_fmt_desc_get(vkctx->frames->sw_format); > + depth = fmt->comp[0].depth; > + > + /* 16-bit depth is unsupported by this encoder */ > + if (depth == 16) { > + av_log(avctx, AV_LOG_ERROR, "16-bit pixel format depth is unsupported by this encoder\n"); > + return AVERROR(ENOTSUP); > + } > + > + /* Perform common initialization. */ > + err = ff_vc2_encode_init(avctx, depth); > + if (err < 0) > + return err; > + > + /* Initialize Haar push data */ > + sv->dwt_consts.diff_offset = s->diff_offset; > + sv->dwt_consts.s = s->wavelet_idx == VC2_TRANSFORM_HAAR_S ? 1 : 0; > + sv->dwt_consts.level = 0; > + > + /* Initializer slice calculation push data */ > + sv->calc_consts.num_x = s->num_x; > + sv->calc_consts.num_y = s->num_y; > + sv->calc_consts.wavelet_depth = s->wavelet_depth; > + sv->calc_consts.prefix_bytes = s->prefix_bytes; > + > + /* Initialize encoder push data */ > + sv->enc_consts.wavelet_depth = s->wavelet_depth; > + sv->enc_consts.num_x = s->num_x; > + sv->enc_consts.num_y = s->num_y; > + > + /* Create buffer for encoder auxilary data. */ > + RET(ff_vk_create_buf(vkctx, &sv->lut_buf, sizeof(VC2EncAuxData), NULL, NULL, > + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | > + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, > + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | > + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); > + RET(ff_vk_map_buffer(vkctx, &sv->lut_buf, (void *)&ad, 0)); > + ff_vc2_init_quant_matrix(s, ad->quant); > + memcpy(ad->ff_dirac_qscale_tab, ff_dirac_qscale_tab, sizeof(ff_dirac_qscale_tab)); > + memcpy(ad->interleaved_ue_golomb_tab, interleaved_ue_golomb_tab, sizeof(interleaved_ue_golomb_tab)); > + memcpy(ad->top_interleaved_ue_golomb_tab, top_interleaved_ue_golomb_tab, sizeof(top_interleaved_ue_golomb_tab)); > + memcpy(ad->golomb_len_tab, golomb_len_tab, sizeof(golomb_len_tab)); > + RET(ff_vk_unmap_buffer(vkctx, &sv->lut_buf, 1)); > + > + /* Create buffer for encoder auxilary data. */ > + RET(ff_vk_create_buf(vkctx, &sv->slice_buf, > + sizeof(VC2EncSliceArgs) * s->num_x * s->num_y, > + NULL, NULL, > + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | > + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, > + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | > + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); > + RET(ff_vk_map_buffer(vkctx, &sv->slice_buf, (void *)&sv->slice_args, 0)); > + memset(sv->slice_args, 0, sv->slice_buf.size); > + > + /* Initialize intermediate frame pool. */ > + RET(init_frame_pools(avctx)); > + > + /* Initialize encoding pipelines */ > + init_vulkan_pipeline(sv, spv, &sv->dwt_upload_shd, sizeof(VC2DwtPushData), > + 8, 8, 1, "dwt_upload_pl", ff_source_vc2_dwt_upload_comp, 0, 2); > + init_vulkan_pipeline(sv, spv, &sv->slice_shd, sizeof(VC2EncPushData), > + SLICE_WORKGROUP_X, 1, 1, "slice_pl", ff_source_vc2_slice_sizes_comp, 1, 3); > + init_vulkan_pipeline(sv, spv, &sv->enc_shd, sizeof(VC2EncPushData), > + SLICE_WORKGROUP_X, 1, 1, "enc_pl", ff_source_vc2_encode_comp, 1, 3); > + sv->haar_subgroup = 0; > + > + if (s->wavelet_idx == VC2_TRANSFORM_HAAR || s->wavelet_idx == VC2_TRANSFORM_HAAR_S) { > + if (subgroup_size == 32 && s->wavelet_depth < 3) { > + init_vulkan_pipeline(sv, spv, &sv->dwt_haar_shd, sizeof(VC2DwtPushData), > + 64, 1, 1, "dwt_haar_pl", ff_source_vc2_dwt_haar_subgroup_comp, 1, 1); > + sv->haar_subgroup = 1; > + } else if (subgroup_size == 64 && s->wavelet_depth < 4) { > + init_vulkan_pipeline(sv, spv, &sv->dwt_haar_shd, sizeof(VC2DwtPushData), > + 64, 1, 1, "dwt_haar_pl", ff_source_vc2_dwt_haar_subgroup_comp, 1, 1); > + sv->haar_subgroup = 1; > + } else { > + init_vulkan_pipeline(sv, spv, &sv->dwt_haar_shd, sizeof(VC2DwtPushData), > + 32, 32, 1, "dwt_haar_pl", ff_source_vc2_dwt_haar_comp, 1, 1); > + } > + } else if (s->wavelet_idx == VC2_TRANSFORM_5_3) { > + init_vulkan_pipeline(sv, spv, &sv->dwt_hor_shd, sizeof(VC2DwtPushData), > + LEGALL_WORKGROUP_X, 1, 1, "dwt_hor_pl", ff_source_vc2_dwt_hor_legall_comp, 1, 1); > + init_vulkan_pipeline(sv, spv, &sv->dwt_ver_shd, sizeof(VC2DwtPushData), > + LEGALL_WORKGROUP_X, 1, 1, "dwt_ver_pl", ff_source_vc2_dwt_ver_legall_comp, 1, 1); > + } > + > +fail: > + return err; > +} > + > +#define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) > +static const AVOption vc2enc_options[] = { > + {"tolerance", "Max undershoot in percent", offsetof(VC2EncContext, tolerance), AV_OPT_TYPE_DOUBLE, {.dbl = 5.0f}, 0.0f, 45.0f, VC2ENC_FLAGS, .unit = "tolerance"}, > + {"slice_width", "Slice width", offsetof(VC2EncContext, slice_width), AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024, VC2ENC_FLAGS, .unit = "slice_width"}, > + {"slice_height", "Slice height", offsetof(VC2EncContext, slice_height), AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024, VC2ENC_FLAGS, .unit = "slice_height"}, > + {"wavelet_depth", "Transform depth", offsetof(VC2EncContext, wavelet_depth), AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5, VC2ENC_FLAGS, .unit = "wavelet_depth"}, > + {"wavelet_type", "Transform type", offsetof(VC2EncContext, wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_5_3}, 0, VC2_TRANSFORMS_NB, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > + {"5_3", "LeGall (5,3)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_5_3}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > + {"haar", "Haar (with shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR_S}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > + {"haar_noshift", "Haar (without shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > + {"qm", "Custom quantization matrix", offsetof(VC2EncContext, quant_matrix), AV_OPT_TYPE_INT, {.i64 = VC2_QM_DEF}, 0, VC2_QM_NB, VC2ENC_FLAGS, .unit = "quant_matrix"}, > + {"default", "Default from the specifications", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_DEF}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, > + {"color", "Prevents low bitrate discoloration", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_COL}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, > + {"flat", "Optimize for PSNR", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_FLAT}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, > + {NULL} > +}; > + > +static const AVClass vc2enc_class = { > + .class_name = "vc2_vulkan_encoder", > + .category = AV_CLASS_CATEGORY_ENCODER, > + .option = vc2enc_options, > + .item_name = av_default_item_name, > + .version = LIBAVUTIL_VERSION_INT > +}; > + > +static const FFCodecDefault vc2enc_defaults[] = { > + { "b", "600000000" }, > + { NULL }, > +}; > + > +static const AVCodecHWConfigInternal *const ff_vc2_hw_configs[] = { > + HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), > + HW_CONFIG_ENCODER_DEVICE(NONE, VULKAN), > + NULL, > +}; > + > +const FFCodec ff_vc2_vulkan_encoder = { > + .p.name = "vc2_vulkan", > + CODEC_LONG_NAME("SMPTE VC-2"), > + .p.type = AVMEDIA_TYPE_VIDEO, > + .p.id = AV_CODEC_ID_DIRAC, > + .p.capabilities = AV_CODEC_CAP_HARDWARE, > + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, > + .priv_data_size = sizeof(VC2EncVulkanContext), > + .init = vc2_encode_init, > + .close = vc2_encode_end, > + FF_CODEC_ENCODE_CB(vc2_encode_frame), > + .p.priv_class = &vc2enc_class, > + .defaults = vc2enc_defaults, > + CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), > + .hw_configs = ff_vc2_hw_configs, > +}; > diff --git a/libavcodec/vulkan/vc2_dwt_haar.comp b/libavcodec/vulkan/vc2_dwt_haar.comp > new file mode 100644 > index 0000000000..4806cca729 > --- /dev/null > +++ b/libavcodec/vulkan/vc2_dwt_haar.comp > @@ -0,0 +1,82 @@ > +/* > + * VC2 codec > + * > + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#extension GL_EXT_scalar_block_layout : require > +#extension GL_EXT_buffer_reference : require > + > +#define LOCAL_X 1024 > + > +layout(push_constant, scalar) uniform ComputeInfo { > + int s; > + int plane_idx; > + int wavelet_depth; > +}; > + > +shared int local_coef[LOCAL_X]; > + > +void main() > +{ > + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); > + ivec2 dwt_dim = imageSize(coef_buf[plane_idx]); > + int value = imageLoad(coef_buf[plane_idx], coord).x; > + > + /* Perform Haar wavelet on the 32x32 local workgroup with shared memory */ > + for (int i = 0; i < wavelet_depth; i++) > + { > + ivec2 mask = ivec2((1 << i) - 1); > + if (any(notEqual(coord & mask, ivec2(0)))) > + break; > + > + /* Offset between valid hor pixels for each level, +1, +2, +4 etc */ > + int dist = (1 << i); > + > + local_coef[gl_LocalInvocationIndex] = value; > + barrier(); > + > + /* Horizontal haar wavelet */ > + uint other_id = gl_LocalInvocationIndex ^ dist; > + int other = local_coef[other_id]; > + int a = gl_LocalInvocationIndex < other_id ? value : other; > + int b = gl_LocalInvocationIndex < other_id ? other : value; > + int dst_b = (b - a) * (1 << s); > + int dst_a = a * (1 << s) + ((dst_b + 1) >> 1); > + value = gl_LocalInvocationIndex < other_id ? dst_a : dst_b; > + > + /* Offset between valid ver pixels for each level, +1, +2, +4 etc */ > + dist <<= 5; > + > + local_coef[gl_LocalInvocationIndex] = value; > + barrier(); > + > + /* Vertical haar wavelet */ > + other_id = gl_LocalInvocationIndex ^ dist; > + other = local_coef[other_id]; > + a = gl_LocalInvocationIndex < other_id ? value : other; > + b = gl_LocalInvocationIndex < other_id ? other : value; > + dst_b = b - a; > + dst_a = a + ((dst_b + 1) >> 1); > + value = gl_LocalInvocationIndex < other_id ? dst_a : dst_b; > + } > + > + /* Store value */ > + imageStore(coef_buf[plane_idx], coord, ivec4(value)); > +} > diff --git a/libavcodec/vulkan/vc2_dwt_haar_subgroup.comp b/libavcodec/vulkan/vc2_dwt_haar_subgroup.comp > new file mode 100644 > index 0000000000..81b0964271 > --- /dev/null > +++ b/libavcodec/vulkan/vc2_dwt_haar_subgroup.comp > @@ -0,0 +1,75 @@ > +/* > + * VC2 codec > + * > + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#extension GL_EXT_scalar_block_layout : require > +#extension GL_KHR_shader_subgroup_basic : require > +#extension GL_KHR_shader_subgroup_shuffle : require > + > +#define TILE_DIM 8 > + > +layout(push_constant, scalar) uniform ComputeInfo { > + int s; > + int plane_idx; > + int wavelet_depth; > +}; > + > +void main() > +{ > + ivec2 tile_coord = ivec2(gl_WorkGroupID.xy); > + ivec2 local_coord = ivec2(gl_LocalInvocationIndex & 7, gl_LocalInvocationIndex >> 3); > + ivec2 coord = tile_coord * ivec2(TILE_DIM) + local_coord; > + > + int value = imageLoad(coef_buf[plane_idx], coord).x; > + for (int i = 0; i < wavelet_depth; i++) > + { > + ivec2 mask = ivec2((1 << i) - 1); > + if (any(notEqual(local_coord & mask, ivec2(0)))) > + break; > + > + /* Offset between valid hor pixels for each level, +1, +2, +4 etc */ > + int dist = (1 << i); > + > + /* Horizontal haar wavelet */ > + uint other_sub_id = gl_SubgroupInvocationID ^ dist; > + int other = subgroupShuffle(value, other_sub_id); > + int a = gl_SubgroupInvocationID < other_sub_id ? value : other; > + int b = gl_SubgroupInvocationID < other_sub_id ? other : value; > + int dst_b = (b - a) * (1 << s); > + int dst_a = a * (1 << s) + ((dst_b + 1) >> 1); > + value = gl_SubgroupInvocationID < other_sub_id ? dst_a : dst_b; > + > + /* Offset between valid ver pixels for each level, +1, +2, +4 etc */ > + dist <<= 3; > + > + /* Vertical haar wavelet */ > + other_sub_id = gl_SubgroupInvocationID ^ dist; > + other = subgroupShuffle(value, other_sub_id); > + a = gl_SubgroupInvocationID < other_sub_id ? value : other; > + b = gl_SubgroupInvocationID < other_sub_id ? other : value; > + dst_b = b - a; > + dst_a = a + ((dst_b + 1) >> 1); > + value = gl_SubgroupInvocationID < other_sub_id ? dst_a : dst_b; > + } > + > + /* Store value */ > + imageStore(coef_buf[plane_idx], coord, ivec4(value)); > +} > diff --git a/libavcodec/vulkan/vc2_dwt_hor_legall.comp b/libavcodec/vulkan/vc2_dwt_hor_legall.comp > new file mode 100644 > index 0000000000..bada2ee1fd > --- /dev/null > +++ b/libavcodec/vulkan/vc2_dwt_hor_legall.comp > @@ -0,0 +1,82 @@ > +/* > + * VC2 codec > + * > + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#extension GL_EXT_scalar_block_layout : require > +#extension GL_EXT_buffer_reference : require > + > +layout(push_constant, scalar) uniform ComputeInfo { > + int s; > + int diff_offset; > + int level; > +}; > + > +int image_load(int coord_x) > +{ > + int coord_y = int(gl_GlobalInvocationID.x); > + return imageLoad(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y)).x; > +} > + > +void image_store(int coord_x, int value) > +{ > + int coord_y = int(gl_GlobalInvocationID.x); > + imageStore(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y), ivec4(value)); > +} > + > +void main() > +{ > + int coord_y = int(gl_GlobalInvocationID.x); > + uint plane_idx = gl_GlobalInvocationID.z; > + ivec2 work_area = imageSize(coef_buf[plane_idx]); > + int dist = 1 << level; > + if (coord_y >= work_area.y || (coord_y & (dist - 1)) != 0) > + return; > + > + // Shift in one bit that is used for additional precision > + for (int x = 0; x < work_area.x; x += dist) > + image_store(x, image_load(x) << 1); > + > + // Lifting stage 2 > + for (int x = 0; x < work_area.x - 2 * dist; x += 2 * dist) { > + int lhs = image_load(x); > + int rhs = image_load(x + 2 * dist); > + int value = image_load(x + dist); > + value -= (lhs + rhs + 1) >> 1; > + image_store(x + dist, value); > + } > + int lhs = image_load(work_area.x - 2 * dist); > + int value = image_load(work_area.x - dist); > + value -= (2 * lhs + 1) >> 1; > + image_store(work_area.x - dist, value); > + > + // Lifting stage 1 > + lhs = image_load(dist); > + value = image_load(0); > + value += (2 * lhs + 2) >> 2; > + image_store(0, value); > + for (int x = 2 * dist; x <= work_area.x - 2 * dist; x += 2 * dist) { > + int lhs = image_load(x - dist); > + int rhs = image_load(x + dist); > + int value = image_load(x); > + value += (lhs + rhs + 2) >> 2; > + image_store(x, value); > + } > +} > diff --git a/libavcodec/vulkan/vc2_dwt_upload.comp b/libavcodec/vulkan/vc2_dwt_upload.comp > new file mode 100644 > index 0000000000..c758fd867f > --- /dev/null > +++ b/libavcodec/vulkan/vc2_dwt_upload.comp > @@ -0,0 +1,96 @@ > +/* > + * VC2 codec > + * > + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#extension GL_EXT_scalar_block_layout : require > +#extension GL_EXT_shader_explicit_arithmetic_types : require > + > +#define AV_PIX_FMT_XV30 214 > +#define AV_PIX_FMT_XV36 216 > +#define AV_PIX_FMT_XV48 242 > +#define AV_PIX_FMT_P212 222 > +#define AV_PIX_FMT_P012 209 > +#define AV_PIX_FMT_P210 198 > +#define AV_PIX_FMT_P016 169 > +#define AV_PIX_FMT_P010 158 > +#define AV_PIX_FMT_NV16 101 > +#define AV_PIX_FMT_NV12 23 > + > +#define Y 0 > +#define U 1 > +#define V 2 > + > +layout(push_constant, scalar) uniform ComputeInfo { > + int s; > + int diff_offset; > + int level; > +}; > + > +uvec4 load_plane(uint plane_idx) > +{ > + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); > + return imageLoad(src_planes[plane_idx], coord); > +} > + > +void store_plane(uint plane_idx, uint value) > +{ > + int result = int(value - diff_offset); > + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); > + imageStore(coef_buf[plane_idx], coord, ivec4(result)); > +} > + > +void main() > +{ > + uvec4 p0 = load_plane(0); > +#if PLANE_FMT == AV_PIX_FMT_XV30 > + store_plane(Y, (p0.x >> 10) & 0x3FF); > + store_plane(U, p0.x & 0x3FF); > + store_plane(V, (p0.x >> 20) & 0x3FF); > +#elif PLANE_FMT == AV_PIX_FMT_XV36 > + store_plane(Y, p0.y >> 4); > + store_plane(U, p0.x >> 4); > + store_plane(V, p0.z >> 4); > +#elif PLANE_FMT == AV_PIX_FMT_NV12 > + uvec4 p1 = load_plane(1); > + store_plane(Y, p0.x | p0.y << 8); > + store_plane(U, p1.x); > + store_plane(V, p1.y); > +#elif PLANE_FMT == AV_PIX_FMT_NV16 > + uvec4 p1 = load_plane(1); > + store_plane(Y, p0.x); > + store_plane(U, p1.x); > + store_plane(V, p1.y); > +#elif PLANE_FMT == AV_PIX_FMT_P010 || PLANE_FMT == AV_PIX_FMT_P210 > + uvec4 p1 = load_plane(1); > + store_plane(Y, p0.x >> 6); > + store_plane(U, p1.x >> 6); > + store_plane(V, p1.y >> 6); > +#elif PLANE_FMT == AV_PIX_FMT_P012 || PLANE_FMT == AV_PIX_FMT_P212 > + uvec4 p1 = load_plane(1); > + store_plane(Y, p0.x >> 4); > + store_plane(U, p1.x >> 4); > + store_plane(V, p1.y >> 4); > +#else > + store_plane(Y, p0.x); > + store_plane(U, load_plane(1).x); > + store_plane(V, load_plane(2).x); > +#endif > +} > diff --git a/libavcodec/vulkan/vc2_dwt_ver_legall.comp b/libavcodec/vulkan/vc2_dwt_ver_legall.comp > new file mode 100644 > index 0000000000..ca391cc8d8 > --- /dev/null > +++ b/libavcodec/vulkan/vc2_dwt_ver_legall.comp > @@ -0,0 +1,78 @@ > +/* > + * VC2 codec > + * > + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#extension GL_EXT_scalar_block_layout : require > +#extension GL_EXT_buffer_reference : require > + > +layout(push_constant, scalar) uniform ComputeInfo { > + int s; > + int diff_offset; > + int level; > +}; > + > +int image_load(int coord_y) > +{ > + int coord_x = int(gl_GlobalInvocationID.x); > + return imageLoad(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y)).x; > +} > + > +void image_store(int coord_y, int value) > +{ > + int coord_x = int(gl_GlobalInvocationID.x); > + imageStore(coef_buf[gl_GlobalInvocationID.z], ivec2(coord_x, coord_y), ivec4(value)); > +} > + > +void main() > +{ > + int coord_x = int(gl_GlobalInvocationID.x); > + uint plane_idx = gl_GlobalInvocationID.z; > + ivec2 work_area = imageSize(coef_buf[plane_idx]); > + int dist = 1 << level; > + if (coord_x >= work_area.x || (coord_x & (dist - 1)) != 0) > + return; > + > + // Lifting stage 2 > + for (int y = dist; y < work_area.y - 2 * dist; y += 2 * dist) { > + int lhs = image_load(y - dist); > + int rhs = image_load(y + dist); > + int value = image_load(y); > + value -= (lhs + rhs + 1) >> 1; > + image_store(y, value); > + } > + int lhs = image_load(work_area.y - 2 * dist); > + int value = image_load(work_area.y - dist); > + value -= (2 * lhs + 1) >> 1; > + image_store(work_area.y - dist, value); > + > + // Lifting stage 1 > + lhs = image_load(dist); > + value = image_load(0); > + value += (2 * lhs + 2) >> 2; > + image_store(0, value); > + for (int y = 2 * dist; y <= work_area.y - 2 * dist; y += 2 * dist) { > + int lhs = image_load(y + dist); > + int rhs = image_load(y - dist); > + int value = image_load(y); > + value += (lhs + rhs + 2) >> 2; > + image_store(y, value); > + } > +} > diff --git a/libavcodec/vulkan/vc2_encode.comp b/libavcodec/vulkan/vc2_encode.comp > new file mode 100644 > index 0000000000..4d8adcca61 > --- /dev/null > +++ b/libavcodec/vulkan/vc2_encode.comp > @@ -0,0 +1,159 @@ > +/* > + * VC2 codec > + * > + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#extension GL_EXT_shader_explicit_arithmetic_types : require > +#extension GL_EXT_scalar_block_layout : require > +#extension GL_EXT_buffer_reference : require > +#extension GL_EXT_debug_printf : require > + > +#define MAX_DWT_LEVELS (5) > + > +layout(push_constant, scalar) uniform ComputeInfo { > + u8buf bytestream; > + ivec2 num_slices; > + int wavelet_depth; > + int size_scaler; > + int prefix_bytes; > +}; > + > +void put_vc2_ue_uint(inout PutBitContext pb, uint val) > +{ > + uint32_t pbits = 1; > + int bits = 1; > + > + ++val; > + > + while ((val >> 8) != 0) > + { > + pbits |= uint32_t(interleaved_ue_golomb_tab[val & 0xff]) << bits; > + val >>= 8; > + bits += 16; > + } > + > + pbits |= uint32_t(top_interleaved_ue_golomb_tab[val]) << bits; > + bits += golomb_len_tab[val]; > + put_bits(pb, bits, pbits); > +} > + > +int quants[MAX_DWT_LEVELS][4]; > + > +int subband_coord(int index, int h, int lvl) > +{ > + int coord = index; > + coord <<= 1; > + coord |= h; > + coord <<= (wavelet_depth-lvl-1); > + return coord; > +} > + > +void main() > +{ > + int slice_index = int(gl_GlobalInvocationID.x); > + int max_index = num_slices.x * num_slices.y; > + if (slice_index >= max_index) > + return; > + > + /* Step 2. Quantize and encode */ > + int pb_start = slice_args[slice_index].pb_start; > + int workgroup_x = int(gl_WorkGroupSize.x); > + for (int i = 0, index = workgroup_x - 1; i < gl_WorkGroupID.x; i++) { > + pb_start += slice_args[index].pb_start + slice_args[index].bytes; > + index += workgroup_x; > + } > + ivec2 slice_coord = ivec2(slice_index % num_slices.x, slice_index / num_slices.x); > + int slice_bytes_max = slice_args[slice_index].bytes; > + int quant_index = slice_args[slice_index].quant_idx; > + > + PutBitContext pb; > + init_put_bits(pb, OFFBUF(u8buf, bytestream, pb_start), slice_bytes_max); > + > + for (int level = 0; level < wavelet_depth; level++) > + for (int orientation = int(level > 0); orientation < 4; orientation++) > + quants[level][orientation] = max(quant_index - lut_quant[level][orientation], 0); > + > + /* Write quant index for this slice */ > + put_bits(pb, 8, quant_index); > + > + /* Luma + 2 Chroma planes */ > + for (int p = 0; p < 3; p++) > + { > + int pad_s, pad_c; > + int bytes_start = int32_t(put_bytes_count(pb)); > + > + /* Save current location and write a zero value */ > + uint64_t write_ptr_start = pb.buf; > + int bit_left_start = pb.bit_left; > + put_bits(pb, 8, 0); > + > + ivec2 dwt_dim = imageSize(coef_buf[p]); > + for (int level = 0; level < wavelet_depth; level++) > + { > + ivec2 band_size = dwt_dim >> (wavelet_depth - level); > + for (int o = int(level > 0); o < 4; o++) > + { > + /* Encode subband */ > + int left = band_size.x * (slice_coord.x) / num_slices.x; > + int right = band_size.x * (slice_coord.x+1) / num_slices.x; > + int top = band_size.y * (slice_coord.y) / num_slices.y; > + int bottom = band_size.y * (slice_coord.y+1) / num_slices.y; > + > + const int q_idx = quants[level][o]; > + const int qfactor = ff_dirac_qscale_tab[q_idx]; > + > + const int yh = o >> 1; > + const int xh = o & 1; > + > + for (int y = top; y < bottom; y++) > + { > + for (int x = left; x < right; x++) > + { > + int sx = subband_coord(x, xh, level); > + int sy = subband_coord(y, yh, level); > + int coef = imageLoad(coef_buf[p], ivec2(sx, sy)).x; > + uint c_abs = uint(abs(coef)); > + c_abs = (c_abs << 2) / qfactor; > + put_vc2_ue_uint(pb, c_abs); > + if (c_abs != 0) > + put_bits(pb, 1, int(coef < 0)); > + } > + } > + } > + } > + flush_put_bits(pb); > + int bytes_len = int32_t(put_bytes_count(pb)) - bytes_start - 1; > + if (p == 2) > + { > + int len_diff = slice_bytes_max - int32_t(put_bytes_count(pb)); > + pad_s = align((bytes_len + len_diff), size_scaler)/size_scaler; > + pad_c = (pad_s*size_scaler) - bytes_len; > + } > + else > + { > + pad_s = align(bytes_len, size_scaler)/size_scaler; > + pad_c = (pad_s*size_scaler) - bytes_len; > + } > + uint64_t start_ptr = write_ptr_start + ((BUF_BITS - bit_left_start) >> 3); > + u8buf(start_ptr).v = uint8_t(pad_s); > + /* vc2-reference uses that padding that decodes to '0' coeffs */ > + skip_put_bytes(pb, pad_c); > + } > +} > diff --git a/libavcodec/vulkan/vc2_slice_sizes.comp b/libavcodec/vulkan/vc2_slice_sizes.comp > new file mode 100644 > index 0000000000..61070c1dc2 > --- /dev/null > +++ b/libavcodec/vulkan/vc2_slice_sizes.comp > @@ -0,0 +1,170 @@ > +/* > + * VC2 codec > + * > + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#extension GL_EXT_shader_explicit_arithmetic_types : require > +#extension GL_EXT_scalar_block_layout : require > +#extension GL_EXT_buffer_reference : require > + > +#define DIRAC_MAX_QUANT_INDEX 116 > +#define MAX_DWT_LEVELS 5 > + > +layout(push_constant, scalar) uniform ComputeInfo { > + ivec2 num_slices; > + int wavelet_depth; > + int size_scaler; > + int prefix_bytes; > + int bits_ceil; > + int bits_floor; > +}; > + > +int count_vc2_ue_uint(uint val) > +{ > + return 2 * findMSB(val + 1) + 1; > +} > + > +int cache[DIRAC_MAX_QUANT_INDEX]; > +int quants[MAX_DWT_LEVELS][4]; > +shared int slice_sizes[gl_WorkGroupSize.x]; > + > +int subband_coord(int index, int h, int lvl) > +{ > + int coord = index; > + coord <<= 1; > + coord |= h; > + coord <<= (wavelet_depth-lvl-1); > + return coord; > +} > + > +int count_hq_slice(int quant_index) > +{ > + int bits = 0; > + if (cache[quant_index] != 0) > + return cache[quant_index]; > + > + bits += 8*prefix_bytes; > + bits += 8; /* quant_idx */ > + > + for (int level = 0; level < wavelet_depth; level++) > + for (int orientation = int(level > 0); orientation < 4; orientation++) > + quants[level][orientation] = max(quant_index - lut_quant[level][orientation], 0); > + > + int slice_index = int(gl_GlobalInvocationID.x); > + ivec2 slice_coord = ivec2(slice_index % num_slices.x, slice_index / num_slices.x); > + for (int p = 0; p < 3; p++) > + { > + int bytes_start = bits >> 3; > + bits += 8; > + > + ivec2 dwt_dim = imageSize(coef_buf[p]); > + for (int level = 0; level < wavelet_depth; level++) > + { > + ivec2 band_dim = dwt_dim >> (wavelet_depth - level); > + for (int o = int(level > 0); o < 4; o++) > + { > + const int left = band_dim.x * slice_coord.x / num_slices.x; > + const int right = band_dim.x * (slice_coord.x+1) / num_slices.x; > + const int top = band_dim.y * slice_coord.y / num_slices.y; > + const int bottom = band_dim.y * (slice_coord.y+1) / num_slices.y; > + > + const int q_idx = quants[level][o]; > + const int qfactor = ff_dirac_qscale_tab[q_idx]; > + > + const int yh = o >> 1; > + const int xh = o & 1; > + > + for (int y = top; y < bottom; y++) > + { > + for (int x = left; x < right; x++) > + { > + int sx = subband_coord(x, xh, level); > + int sy = subband_coord(y, yh, level); > + int coef = imageLoad(coef_buf[p], ivec2(sx, sy)).x; > + uint c_abs = uint(abs(coef)); > + c_abs = (c_abs << 2) / qfactor; > + bits += count_vc2_ue_uint(c_abs); > + bits += int(c_abs > 0); > + } > + } > + } > + } > + bits += align(bits, 8) - bits; > + int bytes_len = (bits >> 3) - bytes_start - 1; > + int pad_s = align(bytes_len, size_scaler) / size_scaler; > + int pad_c = (pad_s * size_scaler) - bytes_len; > + bits += pad_c * 8; > + } > + > + cache[quant_index] = bits; > + return bits; > +} > + > +int ssize_round(int b) > +{ > + return align(b, size_scaler) + 4 + prefix_bytes; > +} > + > +void main() > +{ > + int slice_index = int(gl_GlobalInvocationID.x); > + int max_index = num_slices.x * num_slices.y; > + if (slice_index >= max_index) > + return; > + > + for (int i = 0; i < DIRAC_MAX_QUANT_INDEX; i++) > + cache[i] = 0; > + > + const int q_ceil = DIRAC_MAX_QUANT_INDEX; > + const int top = bits_ceil; > + const int bottom = bits_floor; > + int quant_buf[2] = int[2](-1, -1); > + int quant = slice_args[slice_index].quant_idx; > + int step = 1; > + int bits_last = 0; > + int bits = count_hq_slice(quant); > + while ((bits > top) || (bits < bottom)) > + { > + const int signed_step = bits > top ? +step : -step; > + quant = clamp(quant + signed_step, 0, q_ceil-1); > + bits = count_hq_slice(quant); > + if (quant_buf[1] == quant) > + { > + quant = max(quant_buf[0], quant); > + bits = quant == quant_buf[0] ? bits_last : bits; > + break; > + } > + step = clamp(step / 2, 1, (q_ceil - 1) / 2); > + quant_buf[1] = quant_buf[0]; > + quant_buf[0] = quant; > + bits_last = bits; > + } > + int bytes = ssize_round(bits >> 3); > + slice_args[slice_index].quant_idx = clamp(quant, 0, q_ceil-1); > + slice_args[slice_index].bytes = bytes; > + slice_sizes[gl_LocalInvocationIndex] = bytes; > + barrier(); > + > + /* Prefix sum for all slices in current workgroup */ > + int total_bytes = 0; > + for (int i = 0; i < gl_LocalInvocationIndex; i++) > + total_bytes += slice_sizes[i]; > + slice_args[slice_index].pb_start = total_bytes; > +} > -- > 2.49.0 > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths. 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths IndecisiveTurtle 2025-05-17 20:50 ` IndecisiveTurtle @ 2025-05-19 17:09 ` Andreas Rheinhardt 2025-05-19 17:40 ` IndecisiveTurtle 1 sibling, 1 reply; 10+ messages in thread From: Andreas Rheinhardt @ 2025-05-19 17:09 UTC (permalink / raw) To: ffmpeg-devel IndecisiveTurtle: > From: IndecisiveTurtle <geoster3d@gmail.com> > > Performance wise, encoding a 3440x1440 1-minute video is performed in about 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it takes about 1.3 minutes on my NVIDIA GTX 1650 The last iteration of this patchset claimed 2.5m for the software encoder vs 30s hardware. The software performance improvement seems small compared to what I expected, yet I am surprised about the hardware slowdown (presuming it was the same file). Was the switch to the lut based writing of codes not beneficial? > > Haar shader has a subgroup optimized variant that applies when configured wavelet depth allows it > --- > configure | 1 + > libavcodec/Makefile | 3 + > libavcodec/allcodecs.c | 1 + > libavcodec/vc2enc_vulkan.c | 775 +++++++++++++++++++ > libavcodec/vulkan/vc2_dwt_haar.comp | 82 ++ > libavcodec/vulkan/vc2_dwt_haar_subgroup.comp | 75 ++ > libavcodec/vulkan/vc2_dwt_hor_legall.comp | 82 ++ > libavcodec/vulkan/vc2_dwt_upload.comp | 96 +++ > libavcodec/vulkan/vc2_dwt_ver_legall.comp | 78 ++ > libavcodec/vulkan/vc2_encode.comp | 159 ++++ > libavcodec/vulkan/vc2_slice_sizes.comp | 170 ++++ > 11 files changed, 1522 insertions(+) > create mode 100644 libavcodec/vc2enc_vulkan.c > create mode 100644 libavcodec/vulkan/vc2_dwt_haar.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_haar_subgroup.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_hor_legall.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_upload.comp > create mode 100644 libavcodec/vulkan/vc2_dwt_ver_legall.comp > create mode 100644 libavcodec/vulkan/vc2_encode.comp > create mode 100644 libavcodec/vulkan/vc2_slice_sizes.comp > > +#define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) > +static const AVOption vc2enc_options[] = { > + {"tolerance", "Max undershoot in percent", offsetof(VC2EncContext, tolerance), AV_OPT_TYPE_DOUBLE, {.dbl = 5.0f}, 0.0f, 45.0f, VC2ENC_FLAGS, .unit = "tolerance"}, > + {"slice_width", "Slice width", offsetof(VC2EncContext, slice_width), AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024, VC2ENC_FLAGS, .unit = "slice_width"}, > + {"slice_height", "Slice height", offsetof(VC2EncContext, slice_height), AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024, VC2ENC_FLAGS, .unit = "slice_height"}, > + {"wavelet_depth", "Transform depth", offsetof(VC2EncContext, wavelet_depth), AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5, VC2ENC_FLAGS, .unit = "wavelet_depth"}, > + {"wavelet_type", "Transform type", offsetof(VC2EncContext, wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_5_3}, 0, VC2_TRANSFORMS_NB, VC2ENC_FLAGS, .unit = "wavelet_idx"}, You don't allow the 9_7 wavelet here (intentionally?), but then you should restrict the range to disallow the value 0 (== VC2_TRANSFORM_9_7). > + {"5_3", "LeGall (5,3)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_5_3}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > + {"haar", "Haar (with shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR_S}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > + {"haar_noshift", "Haar (without shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > + {"qm", "Custom quantization matrix", offsetof(VC2EncContext, quant_matrix), AV_OPT_TYPE_INT, {.i64 = VC2_QM_DEF}, 0, VC2_QM_NB, VC2ENC_FLAGS, .unit = "quant_matrix"}, > + {"default", "Default from the specifications", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_DEF}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, > + {"color", "Prevents low bitrate discoloration", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_COL}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, > + {"flat", "Optimize for PSNR", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_FLAT}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, > + {NULL} > +}; > + > +static const AVClass vc2enc_class = { > + .class_name = "vc2_vulkan_encoder", > + .category = AV_CLASS_CATEGORY_ENCODER, > + .option = vc2enc_options, > + .item_name = av_default_item_name, > + .version = LIBAVUTIL_VERSION_INT > +}; > + > +static const FFCodecDefault vc2enc_defaults[] = { > + { "b", "600000000" }, > + { NULL }, > +}; > + > +static const AVCodecHWConfigInternal *const ff_vc2_hw_configs[] = { Should not use ff_ prefix. > + HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), > + HW_CONFIG_ENCODER_DEVICE(NONE, VULKAN), > + NULL, > +}; > + > +const FFCodec ff_vc2_vulkan_encoder = { > + .p.name = "vc2_vulkan", > + CODEC_LONG_NAME("SMPTE VC-2"), > + .p.type = AVMEDIA_TYPE_VIDEO, > + .p.id = AV_CODEC_ID_DIRAC, > + .p.capabilities = AV_CODEC_CAP_HARDWARE, > + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, > + .priv_data_size = sizeof(VC2EncVulkanContext), > + .init = vc2_encode_init, > + .close = vc2_encode_end, > + FF_CODEC_ENCODE_CB(vc2_encode_frame), > + .p.priv_class = &vc2enc_class, > + .defaults = vc2enc_defaults, > + CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), > + .hw_configs = ff_vc2_hw_configs, > +}; > diff --git a/libavcodec/vulkan/vc2_encode.comp b/libavcodec/vulkan/vc2_encode.comp > new file mode 100644 > index 0000000000..4d8adcca61 > --- /dev/null > +++ b/libavcodec/vulkan/vc2_encode.comp > @@ -0,0 +1,159 @@ > +/* > + * VC2 codec > + * > + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#extension GL_EXT_shader_explicit_arithmetic_types : require > +#extension GL_EXT_scalar_block_layout : require > +#extension GL_EXT_buffer_reference : require > +#extension GL_EXT_debug_printf : require > + > +#define MAX_DWT_LEVELS (5) > + > +layout(push_constant, scalar) uniform ComputeInfo { > + u8buf bytestream; > + ivec2 num_slices; > + int wavelet_depth; > + int size_scaler; > + int prefix_bytes; > +}; > + > +void put_vc2_ue_uint(inout PutBitContext pb, uint val) > +{ > + uint32_t pbits = 1; > + int bits = 1; > + > + ++val; > + > + while ((val >> 8) != 0) > + { > + pbits |= uint32_t(interleaved_ue_golomb_tab[val & 0xff]) << bits; > + val >>= 8; > + bits += 16; > + } > + > + pbits |= uint32_t(top_interleaved_ue_golomb_tab[val]) << bits; > + bits += golomb_len_tab[val]; > + put_bits(pb, bits, pbits); I see you switched to a lut based approach; yet you use 32 bits, similarly to what the software decoder did before af9935835335cae1ae5a4ec7fc14c1b5e25c1f2d. Can you guarantee that the encoded coefficients fit into 32bits? Is this a requirement/consequence of the spec? > +} > + > +int quants[MAX_DWT_LEVELS][4]; > + > +int subband_coord(int index, int h, int lvl) > +{ > + int coord = index; > + coord <<= 1; > + coord |= h; > + coord <<= (wavelet_depth-lvl-1); > + return coord; > +} > + _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths. 2025-05-19 17:09 ` Andreas Rheinhardt @ 2025-05-19 17:40 ` IndecisiveTurtle 0 siblings, 0 replies; 10+ messages in thread From: IndecisiveTurtle @ 2025-05-19 17:40 UTC (permalink / raw) To: FFmpeg development discussions and patches > The last iteration of this patchset claimed 2.5m for the software > encoder vs 30s hardware. The software performance improvement seems > small compared to what I expected, yet I am surprised about the hardware > slowdown (presuming it was the same file). Was the switch to the lut > based writing of codes not beneficial? It is not the same video file. The last description was for a 1080p video, this one is between 1440p and 4K. I wanted to put more stress on the encoder to test new performance gains. > You don't allow the 9_7 wavelet here (intentionally?) Yes it is not implemented in vulkan encoder. This is also why I couldn't unify this array as you mentioned before. Στις Δευ 19 Μαΐ 2025 στις 8:09 μ.μ., ο/η Andreas Rheinhardt <andreas.rheinhardt@outlook.com> έγραψε: > > IndecisiveTurtle: > > From: IndecisiveTurtle <geoster3d@gmail.com> > > > > Performance wise, encoding a 3440x1440 1-minute video is performed in about 2.4 minutes with the cpu encoder running on my Ryzen 5 4600H, while it takes about 1.3 minutes on my NVIDIA GTX 1650 > > The last iteration of this patchset claimed 2.5m for the software > encoder vs 30s hardware. The software performance improvement seems > small compared to what I expected, yet I am surprised about the hardware > slowdown (presuming it was the same file). Was the switch to the lut > based writing of codes not beneficial? > > > > > Haar shader has a subgroup optimized variant that applies when configured wavelet depth allows it > > --- > > configure | 1 + > > libavcodec/Makefile | 3 + > > libavcodec/allcodecs.c | 1 + > > libavcodec/vc2enc_vulkan.c | 775 +++++++++++++++++++ > > libavcodec/vulkan/vc2_dwt_haar.comp | 82 ++ > > libavcodec/vulkan/vc2_dwt_haar_subgroup.comp | 75 ++ > > libavcodec/vulkan/vc2_dwt_hor_legall.comp | 82 ++ > > libavcodec/vulkan/vc2_dwt_upload.comp | 96 +++ > > libavcodec/vulkan/vc2_dwt_ver_legall.comp | 78 ++ > > libavcodec/vulkan/vc2_encode.comp | 159 ++++ > > libavcodec/vulkan/vc2_slice_sizes.comp | 170 ++++ > > 11 files changed, 1522 insertions(+) > > create mode 100644 libavcodec/vc2enc_vulkan.c > > create mode 100644 libavcodec/vulkan/vc2_dwt_haar.comp > > create mode 100644 libavcodec/vulkan/vc2_dwt_haar_subgroup.comp > > create mode 100644 libavcodec/vulkan/vc2_dwt_hor_legall.comp > > create mode 100644 libavcodec/vulkan/vc2_dwt_upload.comp > > create mode 100644 libavcodec/vulkan/vc2_dwt_ver_legall.comp > > create mode 100644 libavcodec/vulkan/vc2_encode.comp > > create mode 100644 libavcodec/vulkan/vc2_slice_sizes.comp > > > > > > +#define VC2ENC_FLAGS (AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) > > +static const AVOption vc2enc_options[] = { > > + {"tolerance", "Max undershoot in percent", offsetof(VC2EncContext, tolerance), AV_OPT_TYPE_DOUBLE, {.dbl = 5.0f}, 0.0f, 45.0f, VC2ENC_FLAGS, .unit = "tolerance"}, > > + {"slice_width", "Slice width", offsetof(VC2EncContext, slice_width), AV_OPT_TYPE_INT, {.i64 = 32}, 32, 1024, VC2ENC_FLAGS, .unit = "slice_width"}, > > + {"slice_height", "Slice height", offsetof(VC2EncContext, slice_height), AV_OPT_TYPE_INT, {.i64 = 16}, 8, 1024, VC2ENC_FLAGS, .unit = "slice_height"}, > > + {"wavelet_depth", "Transform depth", offsetof(VC2EncContext, wavelet_depth), AV_OPT_TYPE_INT, {.i64 = 4}, 1, 5, VC2ENC_FLAGS, .unit = "wavelet_depth"}, > > + {"wavelet_type", "Transform type", offsetof(VC2EncContext, wavelet_idx), AV_OPT_TYPE_INT, {.i64 = VC2_TRANSFORM_5_3}, 0, VC2_TRANSFORMS_NB, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > > You don't allow the 9_7 wavelet here (intentionally?), but then you > should restrict the range to disallow the value 0 (== VC2_TRANSFORM_9_7). > > > + {"5_3", "LeGall (5,3)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_5_3}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > > + {"haar", "Haar (with shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR_S}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > > + {"haar_noshift", "Haar (without shift)", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_TRANSFORM_HAAR}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "wavelet_idx"}, > > + {"qm", "Custom quantization matrix", offsetof(VC2EncContext, quant_matrix), AV_OPT_TYPE_INT, {.i64 = VC2_QM_DEF}, 0, VC2_QM_NB, VC2ENC_FLAGS, .unit = "quant_matrix"}, > > + {"default", "Default from the specifications", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_DEF}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, > > + {"color", "Prevents low bitrate discoloration", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_COL}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, > > + {"flat", "Optimize for PSNR", 0, AV_OPT_TYPE_CONST, {.i64 = VC2_QM_FLAT}, INT_MIN, INT_MAX, VC2ENC_FLAGS, .unit = "quant_matrix"}, > > + {NULL} > > +}; > > + > > +static const AVClass vc2enc_class = { > > + .class_name = "vc2_vulkan_encoder", > > + .category = AV_CLASS_CATEGORY_ENCODER, > > + .option = vc2enc_options, > > + .item_name = av_default_item_name, > > + .version = LIBAVUTIL_VERSION_INT > > +}; > > + > > +static const FFCodecDefault vc2enc_defaults[] = { > > + { "b", "600000000" }, > > + { NULL }, > > +}; > > + > > +static const AVCodecHWConfigInternal *const ff_vc2_hw_configs[] = { > > Should not use ff_ prefix. > > > > + HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN), > > + HW_CONFIG_ENCODER_DEVICE(NONE, VULKAN), > > + NULL, > > +}; > > + > > +const FFCodec ff_vc2_vulkan_encoder = { > > + .p.name = "vc2_vulkan", > > + CODEC_LONG_NAME("SMPTE VC-2"), > > + .p.type = AVMEDIA_TYPE_VIDEO, > > + .p.id = AV_CODEC_ID_DIRAC, > > + .p.capabilities = AV_CODEC_CAP_HARDWARE, > > + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, > > + .priv_data_size = sizeof(VC2EncVulkanContext), > > + .init = vc2_encode_init, > > + .close = vc2_encode_end, > > + FF_CODEC_ENCODE_CB(vc2_encode_frame), > > + .p.priv_class = &vc2enc_class, > > + .defaults = vc2enc_defaults, > > + CODEC_PIXFMTS(AV_PIX_FMT_VULKAN), > > + .hw_configs = ff_vc2_hw_configs, > > +}; > > diff --git a/libavcodec/vulkan/vc2_encode.comp b/libavcodec/vulkan/vc2_encode.comp > > new file mode 100644 > > index 0000000000..4d8adcca61 > > --- /dev/null > > +++ b/libavcodec/vulkan/vc2_encode.comp > > @@ -0,0 +1,159 @@ > > +/* > > + * VC2 codec > > + * > > + * Copyright (c) 2025 raphaelthegreat <geoster3d@gmail.com> > > + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with FFmpeg; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > > + */ > > + > > +#extension GL_EXT_shader_explicit_arithmetic_types : require > > +#extension GL_EXT_scalar_block_layout : require > > +#extension GL_EXT_buffer_reference : require > > +#extension GL_EXT_debug_printf : require > > + > > +#define MAX_DWT_LEVELS (5) > > + > > +layout(push_constant, scalar) uniform ComputeInfo { > > + u8buf bytestream; > > + ivec2 num_slices; > > + int wavelet_depth; > > + int size_scaler; > > + int prefix_bytes; > > +}; > > + > > +void put_vc2_ue_uint(inout PutBitContext pb, uint val) > > +{ > > + uint32_t pbits = 1; > > + int bits = 1; > > + > > + ++val; > > + > > + while ((val >> 8) != 0) > > + { > > + pbits |= uint32_t(interleaved_ue_golomb_tab[val & 0xff]) << bits; > > + val >>= 8; > > + bits += 16; > > + } > > + > > + pbits |= uint32_t(top_interleaved_ue_golomb_tab[val]) << bits; > > + bits += golomb_len_tab[val]; > > + put_bits(pb, bits, pbits); > > I see you switched to a lut based approach; yet you use 32 bits, > similarly to what the software decoder did before > af9935835335cae1ae5a4ec7fc14c1b5e25c1f2d. Can you guarantee that the > encoded coefficients fit into 32bits? Is this a requirement/consequence > of the spec? > > > +} > > + > > +int quants[MAX_DWT_LEVELS][4]; > > + > > +int subband_coord(int index, int h, int lvl) > > +{ > > + int coord = index; > > + coord <<= 1; > > + coord |= h; > > + coord <<= (wavelet_depth-lvl-1); > > + return coord; > > +} > > + > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders 2025-05-17 20:48 [FFmpeg-devel] [PATCH v4 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders IndecisiveTurtle ` (2 preceding siblings ...) 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths IndecisiveTurtle @ 2025-05-19 15:56 ` Andreas Rheinhardt 3 siblings, 0 replies; 10+ messages in thread From: Andreas Rheinhardt @ 2025-05-19 15:56 UTC (permalink / raw) To: ffmpeg-devel IndecisiveTurtle: > From: IndecisiveTurtle <geoster3d@gmail.com> > > --- > libavcodec/Makefile | 2 +- > libavcodec/vc2enc.c | 679 ++----------------------------------- > libavcodec/vc2enc_common.c | 571 +++++++++++++++++++++++++++++++ > libavcodec/vc2enc_common.h | 178 ++++++++++ > 4 files changed, 772 insertions(+), 658 deletions(-) > create mode 100644 libavcodec/vc2enc_common.c > create mode 100644 libavcodec/vc2enc_common.h > > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index 77734dff24..bdf0d6742e 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -771,7 +771,7 @@ OBJS-$(CONFIG_VC1_CUVID_DECODER) += cuviddec.o > OBJS-$(CONFIG_VC1_MMAL_DECODER) += mmaldec.o > OBJS-$(CONFIG_VC1_QSV_DECODER) += qsvdec.o > OBJS-$(CONFIG_VC1_V4L2M2M_DECODER) += v4l2_m2m_dec.o > -OBJS-$(CONFIG_VC2_ENCODER) += vc2enc.o vc2enc_dwt.o diractab.o > +OBJS-$(CONFIG_VC2_ENCODER) += vc2enc.o vc2enc_dwt.o vc2enc_common.o diractab.o Seems like this should be split into two lines > OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o > OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdaudio.o > OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdvideo.o > diff --git a/libavcodec/vc2enc.c b/libavcodec/vc2enc.c > index 99ca95c40a..939bafa195 100644 > --- a/libavcodec/vc2enc.c > +++ b/libavcodec/vc2enc.c > @@ -30,505 +30,11 @@ > #include "put_bits.h" > #include "version.h" > > -#include "vc2enc_dwt.h" > -#include "diractab.h" > - > -/* The limited size resolution of each slice forces us to do this */ > -#define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes) > +#include "vc2enc_common.h" > > /* Decides the cutoff point in # of slices to distribute the leftover bytes */ > #define SLICE_REDIST_TOTAL 150 > > -typedef struct VC2BaseVideoFormat { > - enum AVPixelFormat pix_fmt; > - AVRational time_base; > - int width, height; > - uint8_t interlaced, level; > - char name[13]; > -} VC2BaseVideoFormat; > - > -static const VC2BaseVideoFormat base_video_fmts[] = { > - { 0 }, /* Custom format, here just to make indexing equal to base_vf */ > - { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 176, 120, 0, 1, "QSIF525" }, > - { AV_PIX_FMT_YUV420P, { 2, 25 }, 176, 144, 0, 1, "QCIF" }, > - { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 352, 240, 0, 1, "SIF525" }, > - { AV_PIX_FMT_YUV420P, { 2, 25 }, 352, 288, 0, 1, "CIF" }, > - { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 704, 480, 0, 1, "4SIF525" }, > - { AV_PIX_FMT_YUV420P, { 2, 25 }, 704, 576, 0, 1, "4CIF" }, > - > - { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 480, 1, 2, "SD480I-60" }, > - { AV_PIX_FMT_YUV422P10, { 1, 25 }, 720, 576, 1, 2, "SD576I-50" }, > - > - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1280, 720, 0, 3, "HD720P-60" }, > - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1280, 720, 0, 3, "HD720P-50" }, > - { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 1920, 1080, 1, 3, "HD1080I-60" }, > - { AV_PIX_FMT_YUV422P10, { 1, 25 }, 1920, 1080, 1, 3, "HD1080I-50" }, > - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1920, 1080, 0, 3, "HD1080P-60" }, > - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1920, 1080, 0, 3, "HD1080P-50" }, > - > - { AV_PIX_FMT_YUV444P12, { 1, 24 }, 2048, 1080, 0, 4, "DC2K" }, > - { AV_PIX_FMT_YUV444P12, { 1, 24 }, 4096, 2160, 0, 5, "DC4K" }, > - > - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 3840, 2160, 0, 6, "UHDTV 4K-60" }, > - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 3840, 2160, 0, 6, "UHDTV 4K-50" }, > - > - { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 7680, 4320, 0, 7, "UHDTV 8K-60" }, > - { AV_PIX_FMT_YUV422P10, { 1, 50 }, 7680, 4320, 0, 7, "UHDTV 8K-50" }, > - > - { AV_PIX_FMT_YUV422P10, { 1001, 24000 }, 1920, 1080, 0, 3, "HD1080P-24" }, > - { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 486, 1, 2, "SD Pro486" }, > -}; > -static const int base_video_fmts_len = FF_ARRAY_ELEMS(base_video_fmts); > - > -enum VC2_QM { > - VC2_QM_DEF = 0, > - VC2_QM_COL, > - VC2_QM_FLAT, > - > - VC2_QM_NB > -}; > - > -typedef struct SubBand { > - dwtcoef *buf; > - ptrdiff_t stride; > - int width; > - int height; > -} SubBand; > - > -typedef struct Plane { > - SubBand band[MAX_DWT_LEVELS][4]; > - dwtcoef *coef_buf; > - int width; > - int height; > - int dwt_width; > - int dwt_height; > - ptrdiff_t coef_stride; > -} Plane; > - > -typedef struct SliceArgs { > - const struct VC2EncContext *ctx; > - union { > - int cache[DIRAC_MAX_QUANT_INDEX]; > - uint8_t *buf; > - }; > - int x; > - int y; > - int quant_idx; > - int bits_ceil; > - int bits_floor; > - int bytes; > -} SliceArgs; > - > -typedef struct TransformArgs { > - const struct VC2EncContext *ctx; > - Plane *plane; > - const void *idata; > - ptrdiff_t istride; > - int field; > - VC2TransformContext t; > -} TransformArgs; > - > -typedef struct VC2EncContext { > - AVClass *av_class; > - PutBitContext pb; > - Plane plane[3]; > - AVCodecContext *avctx; > - DiracVersionInfo ver; > - > - SliceArgs *slice_args; > - TransformArgs transform_args[3]; > - > - /* For conversion from unsigned pixel values to signed */ > - int diff_offset; > - int bpp; > - int bpp_idx; > - > - /* Picture number */ > - uint32_t picture_number; > - > - /* Base video format */ > - int base_vf; > - int level; > - int profile; > - > - /* Quantization matrix */ > - uint8_t quant[MAX_DWT_LEVELS][4]; > - int custom_quant_matrix; > - > - /* Division LUT */ > - uint32_t qmagic_lut[116][2]; > - > - int num_x; /* #slices horizontally */ > - int num_y; /* #slices vertically */ > - int prefix_bytes; > - int size_scaler; > - int chroma_x_shift; > - int chroma_y_shift; > - > - /* Rate control stuff */ > - int frame_max_bytes; > - int slice_max_bytes; > - int slice_min_bytes; > - int q_ceil; > - int q_avg; > - > - /* Options */ > - double tolerance; > - int wavelet_idx; > - int wavelet_depth; > - int strict_compliance; > - int slice_height; > - int slice_width; > - int interlaced; > - enum VC2_QM quant_matrix; > - > - /* Parse code state */ > - uint32_t next_parse_offset; > - enum DiracParseCodes last_parse_code; > -} VC2EncContext; > - > -/// x_k x_{k-1} ... x_0 -> 0 x_k 0 x_{k - 1} ... 0 x_0 > -static uint16_t interleaved_ue_golomb_tab[256]; > -/// 1 x_{k-1} ... x_0 -> 0 0 0 x_{k - 1} ... 0 x_0 > -static uint16_t top_interleaved_ue_golomb_tab[256]; > -/// 1 x_{k-1} ... x_0 -> 2 * k > -static uint8_t golomb_len_tab[256]; > - > -static av_cold void vc2_init_static_data(void) > -{ > - interleaved_ue_golomb_tab[1] = 1; > - for (unsigned i = 2; i < 256; ++i) { > - golomb_len_tab[i] = golomb_len_tab[i >> 1] + 2; > - interleaved_ue_golomb_tab[i] = (interleaved_ue_golomb_tab[i >> 1] << 2) | (i & 1); > - top_interleaved_ue_golomb_tab[i] = interleaved_ue_golomb_tab[i] ^ (1 << golomb_len_tab[i]); > - } > -} > - > -static av_always_inline void put_vc2_ue_uint_inline(PutBitContext *pb, uint32_t val) > -{ > - uint64_t pbits = 1; > - int bits = 1; > - > - ++val; > - > - while (val >> 8) { > - pbits |= (uint64_t)interleaved_ue_golomb_tab[val & 0xff] << bits; > - val >>= 8; > - bits += 16; > - } > - pbits |= (uint64_t)top_interleaved_ue_golomb_tab[val] << bits; > - bits += golomb_len_tab[val]; > - > - put_bits63(pb, bits, pbits); > -} > - > -static av_noinline void put_vc2_ue_uint(PutBitContext *pb, uint32_t val) > -{ > - put_vc2_ue_uint_inline(pb, val); > -} > - > -static av_always_inline int count_vc2_ue_uint(uint32_t val) > -{ > - return 2 * av_log2(val + 1) + 1; > -} > - > -/* VC-2 10.4 - parse_info() */ > -static void encode_parse_info(VC2EncContext *s, enum DiracParseCodes pcode) > -{ > - uint32_t cur_pos, dist; > - > - align_put_bits(&s->pb); > - > - cur_pos = put_bytes_count(&s->pb, 0); > - > - /* Magic string */ > - ff_put_string(&s->pb, "BBCD", 0); > - > - /* Parse code */ > - put_bits(&s->pb, 8, pcode); > - > - /* Next parse offset */ > - dist = cur_pos - s->next_parse_offset; > - AV_WB32(s->pb.buf + s->next_parse_offset + 5, dist); > - s->next_parse_offset = cur_pos; > - put_bits32(&s->pb, pcode == DIRAC_PCODE_END_SEQ ? 13 : 0); > - > - /* Last parse offset */ > - put_bits32(&s->pb, s->last_parse_code == DIRAC_PCODE_END_SEQ ? 13 : dist); > - > - s->last_parse_code = pcode; > -} > - > -/* VC-2 11.1 - parse_parameters() > - * The level dictates what the decoder should expect in terms of resolution > - * and allows it to quickly reject whatever it can't support. Remember, > - * this codec kinda targets cheapo FPGAs without much memory. Unfortunately > - * it also limits us greatly in our choice of formats, hence the flag to disable > - * strict_compliance */ > -static void encode_parse_params(VC2EncContext *s) > -{ > - put_vc2_ue_uint(&s->pb, s->ver.major); /* VC-2 demands this to be 2 */ > - put_vc2_ue_uint(&s->pb, s->ver.minor); /* ^^ and this to be 0 */ > - put_vc2_ue_uint(&s->pb, s->profile); /* 3 to signal HQ profile */ > - put_vc2_ue_uint(&s->pb, s->level); /* 3 - 1080/720, 6 - 4K */ > -} > - > -/* VC-2 11.3 - frame_size() */ > -static void encode_frame_size(VC2EncContext *s) > -{ > - put_bits(&s->pb, 1, !s->strict_compliance); > - if (!s->strict_compliance) { > - AVCodecContext *avctx = s->avctx; > - put_vc2_ue_uint(&s->pb, avctx->width); > - put_vc2_ue_uint(&s->pb, avctx->height); > - } > -} > - > -/* VC-2 11.3.3 - color_diff_sampling_format() */ > -static void encode_sample_fmt(VC2EncContext *s) > -{ > - put_bits(&s->pb, 1, !s->strict_compliance); > - if (!s->strict_compliance) { > - int idx; > - if (s->chroma_x_shift == 1 && s->chroma_y_shift == 0) > - idx = 1; /* 422 */ > - else if (s->chroma_x_shift == 1 && s->chroma_y_shift == 1) > - idx = 2; /* 420 */ > - else > - idx = 0; /* 444 */ > - put_vc2_ue_uint(&s->pb, idx); > - } > -} > - > -/* VC-2 11.3.4 - scan_format() */ > -static void encode_scan_format(VC2EncContext *s) > -{ > - put_bits(&s->pb, 1, !s->strict_compliance); > - if (!s->strict_compliance) > - put_vc2_ue_uint(&s->pb, s->interlaced); > -} > - > -/* VC-2 11.3.5 - frame_rate() */ > -static void encode_frame_rate(VC2EncContext *s) > -{ > - put_bits(&s->pb, 1, !s->strict_compliance); > - if (!s->strict_compliance) { > - AVCodecContext *avctx = s->avctx; > - put_vc2_ue_uint(&s->pb, 0); > - put_vc2_ue_uint(&s->pb, avctx->time_base.den); > - put_vc2_ue_uint(&s->pb, avctx->time_base.num); > - } > -} > - > -/* VC-2 11.3.6 - aspect_ratio() */ > -static void encode_aspect_ratio(VC2EncContext *s) > -{ > - put_bits(&s->pb, 1, !s->strict_compliance); > - if (!s->strict_compliance) { > - AVCodecContext *avctx = s->avctx; > - put_vc2_ue_uint(&s->pb, 0); > - put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.num); > - put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.den); > - } > -} > - > -/* VC-2 11.3.7 - clean_area() */ > -static void encode_clean_area(VC2EncContext *s) > -{ > - put_bits(&s->pb, 1, 0); > -} > - > -/* VC-2 11.3.8 - signal_range() */ > -static void encode_signal_range(VC2EncContext *s) > -{ > - put_bits(&s->pb, 1, !s->strict_compliance); > - if (!s->strict_compliance) > - put_vc2_ue_uint(&s->pb, s->bpp_idx); > -} > - > -/* VC-2 11.3.9 - color_spec() */ > -static void encode_color_spec(VC2EncContext *s) > -{ > - AVCodecContext *avctx = s->avctx; > - put_bits(&s->pb, 1, !s->strict_compliance); > - if (!s->strict_compliance) { > - int val; > - put_vc2_ue_uint(&s->pb, 0); > - > - /* primaries */ > - put_bits(&s->pb, 1, 1); > - if (avctx->color_primaries == AVCOL_PRI_BT470BG) > - val = 2; > - else if (avctx->color_primaries == AVCOL_PRI_SMPTE170M) > - val = 1; > - else if (avctx->color_primaries == AVCOL_PRI_SMPTE240M) > - val = 1; > - else > - val = 0; > - put_vc2_ue_uint(&s->pb, val); > - > - /* color matrix */ > - put_bits(&s->pb, 1, 1); > - if (avctx->colorspace == AVCOL_SPC_RGB) > - val = 3; > - else if (avctx->colorspace == AVCOL_SPC_YCOCG) > - val = 2; > - else if (avctx->colorspace == AVCOL_SPC_BT470BG) > - val = 1; > - else > - val = 0; > - put_vc2_ue_uint(&s->pb, val); > - > - /* transfer function */ > - put_bits(&s->pb, 1, 1); > - if (avctx->color_trc == AVCOL_TRC_LINEAR) > - val = 2; > - else if (avctx->color_trc == AVCOL_TRC_BT1361_ECG) > - val = 1; > - else > - val = 0; > - put_vc2_ue_uint(&s->pb, val); > - } > -} > - > -/* VC-2 11.3 - source_parameters() */ > -static void encode_source_params(VC2EncContext *s) > -{ > - encode_frame_size(s); > - encode_sample_fmt(s); > - encode_scan_format(s); > - encode_frame_rate(s); > - encode_aspect_ratio(s); > - encode_clean_area(s); > - encode_signal_range(s); > - encode_color_spec(s); > -} > - > -/* VC-2 11 - sequence_header() */ > -static void encode_seq_header(VC2EncContext *s) > -{ > - align_put_bits(&s->pb); > - encode_parse_params(s); > - put_vc2_ue_uint(&s->pb, s->base_vf); > - encode_source_params(s); > - put_vc2_ue_uint(&s->pb, s->interlaced); /* Frames or fields coding */ > -} > - > -/* VC-2 12.1 - picture_header() */ > -static void encode_picture_header(VC2EncContext *s) > -{ > - align_put_bits(&s->pb); > - put_bits32(&s->pb, s->picture_number++); > -} > - > -/* VC-2 12.3.4.1 - slice_parameters() */ > -static void encode_slice_params(VC2EncContext *s) > -{ > - put_vc2_ue_uint(&s->pb, s->num_x); > - put_vc2_ue_uint(&s->pb, s->num_y); > - put_vc2_ue_uint(&s->pb, s->prefix_bytes); > - put_vc2_ue_uint(&s->pb, s->size_scaler); > -} > - > -/* 1st idx = LL, second - vertical, third - horizontal, fourth - total */ > -static const uint8_t vc2_qm_col_tab[][4] = { > - {20, 9, 15, 4}, > - { 0, 6, 6, 4}, > - { 0, 3, 3, 5}, > - { 0, 3, 5, 1}, > - { 0, 11, 10, 11} > -}; > - > -static const uint8_t vc2_qm_flat_tab[][4] = { > - { 0, 0, 0, 0}, > - { 0, 0, 0, 0}, > - { 0, 0, 0, 0}, > - { 0, 0, 0, 0}, > - { 0, 0, 0, 0} > -}; > - > -static void init_quant_matrix(VC2EncContext *s) > -{ > - int level, orientation; > - > - if (s->wavelet_depth <= 4 && s->quant_matrix == VC2_QM_DEF) { > - s->custom_quant_matrix = 0; > - for (level = 0; level < s->wavelet_depth; level++) { > - s->quant[level][0] = ff_dirac_default_qmat[s->wavelet_idx][level][0]; > - s->quant[level][1] = ff_dirac_default_qmat[s->wavelet_idx][level][1]; > - s->quant[level][2] = ff_dirac_default_qmat[s->wavelet_idx][level][2]; > - s->quant[level][3] = ff_dirac_default_qmat[s->wavelet_idx][level][3]; > - } > - return; > - } > - > - s->custom_quant_matrix = 1; > - > - if (s->quant_matrix == VC2_QM_DEF) { > - for (level = 0; level < s->wavelet_depth; level++) { > - for (orientation = 0; orientation < 4; orientation++) { > - if (level <= 3) > - s->quant[level][orientation] = ff_dirac_default_qmat[s->wavelet_idx][level][orientation]; > - else > - s->quant[level][orientation] = vc2_qm_col_tab[level][orientation]; > - } > - } > - } else if (s->quant_matrix == VC2_QM_COL) { > - for (level = 0; level < s->wavelet_depth; level++) { > - for (orientation = 0; orientation < 4; orientation++) { > - s->quant[level][orientation] = vc2_qm_col_tab[level][orientation]; > - } > - } > - } else { > - for (level = 0; level < s->wavelet_depth; level++) { > - for (orientation = 0; orientation < 4; orientation++) { > - s->quant[level][orientation] = vc2_qm_flat_tab[level][orientation]; > - } > - } > - } > -} > - > -/* VC-2 12.3.4.2 - quant_matrix() */ > -static void encode_quant_matrix(VC2EncContext *s) > -{ > - int level; > - put_bits(&s->pb, 1, s->custom_quant_matrix); > - if (s->custom_quant_matrix) { > - put_vc2_ue_uint(&s->pb, s->quant[0][0]); > - for (level = 0; level < s->wavelet_depth; level++) { > - put_vc2_ue_uint(&s->pb, s->quant[level][1]); > - put_vc2_ue_uint(&s->pb, s->quant[level][2]); > - put_vc2_ue_uint(&s->pb, s->quant[level][3]); > - } > - } > -} > - > -/* VC-2 12.3 - transform_parameters() */ > -static void encode_transform_params(VC2EncContext *s) > -{ > - put_vc2_ue_uint(&s->pb, s->wavelet_idx); > - put_vc2_ue_uint(&s->pb, s->wavelet_depth); > - > - encode_slice_params(s); > - encode_quant_matrix(s); > -} > - > -/* VC-2 12.2 - wavelet_transform() */ > -static void encode_wavelet_transform(VC2EncContext *s) > -{ > - encode_transform_params(s); > - align_put_bits(&s->pb); > -} > - > -/* VC-2 12 - picture_parse() */ > -static void encode_picture_start(VC2EncContext *s) > -{ > - align_put_bits(&s->pb); > - encode_picture_header(s); > - align_put_bits(&s->pb); > - encode_wavelet_transform(s); > -} > - > #define QUANT(c, mul, add, shift) (((mul) * (c) + (add)) >> (shift)) > > /* VC-2 13.5.5.2 - slice_band() */ > @@ -558,6 +64,11 @@ static void encode_subband(const VC2EncContext *s, PutBitContext *pb, > } > } > > +static inline int count_vc2_ue_uint(uint32_t val) > +{ > + return 2 * av_log2(val + 1) + 1; > +} > + > static int count_hq_slice(SliceArgs *slice, int quant_idx) > { > int x, y; > @@ -657,7 +168,7 @@ static int calc_slice_sizes(VC2EncContext *s) > SliceArgs *enc_args = s->slice_args; > SliceArgs *top_loc[SLICE_REDIST_TOTAL] = {NULL}; > > - init_quant_matrix(s); > + ff_vc2_init_quant_matrix(s, s->quant); > > for (slice_y = 0; slice_y < s->num_y; slice_y++) { > for (slice_x = 0; slice_x < s->num_x; slice_x++) { > @@ -782,7 +293,7 @@ static int encode_hq_slice(AVCodecContext *avctx, void *arg) > } > > /* VC-2 13.5.1 - low_delay_transform_data() */ > -static int encode_slices(VC2EncContext *s) > +static void encode_slices(VC2EncContext *s) > { > uint8_t *buf; > int slice_x, slice_y, skip = 0; > @@ -803,8 +314,6 @@ static int encode_slices(VC2EncContext *s) > sizeof(SliceArgs)); > > skip_put_bytes(&s->pb, skip); > - > - return 0; > } > > /* > @@ -902,7 +411,7 @@ static int dwt_plane(AVCodecContext *avctx, void *arg) > } > > static int encode_frame(VC2EncContext *s, AVPacket *avpkt, const AVFrame *frame, > - const char *aux_data, const int header_size, int field) > + const int header_size, int field) > { > int i, ret; > int64_t max_frame_bytes; > @@ -929,25 +438,8 @@ static int encode_frame(VC2EncContext *s, AVPacket *avpkt, const AVFrame *frame, > init_put_bits(&s->pb, avpkt->data, avpkt->size); > } > > - /* Sequence header */ > - encode_parse_info(s, DIRAC_PCODE_SEQ_HEADER); > - encode_seq_header(s); > - > - /* Encoder version */ > - if (aux_data) { > - encode_parse_info(s, DIRAC_PCODE_AUX); > - ff_put_string(&s->pb, aux_data, 1); > - } > - > - /* Picture header */ > - encode_parse_info(s, DIRAC_PCODE_PICTURE_HQ); > - encode_picture_start(s); > - > - /* Encode slices */ > - encode_slices(s); > - > - /* End sequence */ > - encode_parse_info(s, DIRAC_PCODE_END_SEQ); > + /* Encode frame */ > + ff_vc2_encode_frame(s, encode_slices); > > return 0; > } > @@ -956,45 +448,20 @@ static av_cold int vc2_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, > const AVFrame *frame, int *got_packet) > { > int ret = 0; > - int slice_ceil, sig_size = 256; > VC2EncContext *s = avctx->priv_data; > const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; > - const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT; > const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT); > const int header_size = 100 + aux_data_size; > - int64_t r_bitrate = avctx->bit_rate >> (s->interlaced); > - > - s->avctx = avctx; > - s->size_scaler = 2; > - s->prefix_bytes = 0; > - s->last_parse_code = 0; > - s->next_parse_offset = 0; > - > - /* Rate control */ > - s->frame_max_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num, > - s->avctx->time_base.den) >> 3) - header_size; > - s->slice_max_bytes = slice_ceil = av_rescale(s->frame_max_bytes, 1, s->num_x*s->num_y); > - > - /* Find an appropriate size scaler */ > - while (sig_size > 255) { > - int r_size = SSIZE_ROUND(s->slice_max_bytes); > - if (r_size > slice_ceil) { > - s->slice_max_bytes -= r_size - slice_ceil; > - r_size = SSIZE_ROUND(s->slice_max_bytes); > - } > - sig_size = r_size/s->size_scaler; /* Signalled slize size */ > - s->size_scaler <<= 1; > - } > > - s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f); > - if (s->slice_min_bytes < 0 || s->slice_max_bytes > INT_MAX >> 3) > - return AVERROR(EINVAL); > + ret = ff_vc2_frame_init_properties(avctx, s); > + if (ret) > + return ret; > > - ret = encode_frame(s, avpkt, frame, aux_data, header_size, s->interlaced); > + ret = encode_frame(s, avpkt, frame, header_size, s->interlaced); > if (ret) > return ret; > if (s->interlaced) { > - ret = encode_frame(s, avpkt, frame, aux_data, header_size, 2); > + ret = encode_frame(s, avpkt, frame, header_size, 2); > if (ret) > return ret; > } > @@ -1026,83 +493,13 @@ static av_cold int vc2_encode_end(AVCodecContext *avctx) > > static av_cold int vc2_encode_init(AVCodecContext *avctx) > { > - static AVOnce init_static_once = AV_ONCE_INIT; > Plane *p; > SubBand *b; > - int i, level, o, shift; > + int ret, i, level, o, shift; > const AVPixFmtDescriptor *pixdesc; > int depth; > VC2EncContext *s = avctx->priv_data; > > - s->picture_number = 0; > - > - /* Total allowed quantization range */ > - s->q_ceil = DIRAC_MAX_QUANT_INDEX; > - > - s->ver.major = 2; > - s->ver.minor = 0; > - s->profile = 3; > - s->level = 3; > - > - s->base_vf = -1; > - s->strict_compliance = 1; > - > - s->q_avg = 0; > - s->slice_max_bytes = 0; > - s->slice_min_bytes = 0; > - > - /* Mark unknown as progressive */ > - s->interlaced = !((avctx->field_order == AV_FIELD_UNKNOWN) || > - (avctx->field_order == AV_FIELD_PROGRESSIVE)); > - > - for (i = 0; i < base_video_fmts_len; i++) { > - const VC2BaseVideoFormat *fmt = &base_video_fmts[i]; > - if (avctx->pix_fmt != fmt->pix_fmt) > - continue; > - if (avctx->time_base.num != fmt->time_base.num) > - continue; > - if (avctx->time_base.den != fmt->time_base.den) > - continue; > - if (avctx->width != fmt->width) > - continue; > - if (avctx->height != fmt->height) > - continue; > - if (s->interlaced != fmt->interlaced) > - continue; > - s->base_vf = i; > - s->level = base_video_fmts[i].level; > - break; > - } > - > - if (s->interlaced) > - av_log(avctx, AV_LOG_WARNING, "Interlacing enabled!\n"); > - > - if ((s->slice_width & (s->slice_width - 1)) || > - (s->slice_height & (s->slice_height - 1))) { > - av_log(avctx, AV_LOG_ERROR, "Slice size is not a power of two!\n"); > - return AVERROR(EINVAL); > - } > - > - if ((s->slice_width > avctx->width) || > - (s->slice_height > avctx->height)) { > - av_log(avctx, AV_LOG_ERROR, "Slice size is bigger than the image!\n"); > - return AVERROR(EINVAL); > - } > - > - if (s->base_vf <= 0) { > - if (avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) { > - s->strict_compliance = s->base_vf = 0; > - av_log(avctx, AV_LOG_WARNING, "Format does not strictly comply with VC2 specs\n"); > - } else { > - av_log(avctx, AV_LOG_WARNING, "Given format does not strictly comply with " > - "the specifications, decrease strictness to use it.\n"); > - return AVERROR(EINVAL); > - } > - } else { > - av_log(avctx, AV_LOG_INFO, "Selected base video format = %i (%s)\n", > - s->base_vf, base_video_fmts[s->base_vf].name); > - } > - > pixdesc = av_pix_fmt_desc_get(avctx->pix_fmt); > /* Chroma subsampling */ > s->chroma_x_shift = pixdesc->log2_chroma_w; > @@ -1110,47 +507,21 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) > > /* Bit depth and color range index */ > depth = pixdesc->comp[0].depth; > - if (depth == 8 && avctx->color_range == AVCOL_RANGE_JPEG) { > - s->bpp = 1; > - s->bpp_idx = 1; > - s->diff_offset = 128; > - } else if (depth == 8 && (avctx->color_range == AVCOL_RANGE_MPEG || > - avctx->color_range == AVCOL_RANGE_UNSPECIFIED)) { > - s->bpp = 1; > - s->bpp_idx = 2; > - s->diff_offset = 128; > - } else if (depth == 10) { > - s->bpp = 2; > - s->bpp_idx = 3; > - s->diff_offset = 512; > - } else { > - s->bpp = 2; > - s->bpp_idx = 4; > - s->diff_offset = 2048; > - } > + > + /* Context initialization */ > + ret = ff_vc2_encode_init(avctx, depth); > + if (ret < 0) > + return ret; > > /* Planes initialization */ > for (i = 0; i < 3; i++) { > - int w, h; > p = &s->plane[i]; > - p->width = avctx->width >> (i ? s->chroma_x_shift : 0); > - p->height = avctx->height >> (i ? s->chroma_y_shift : 0); > - if (s->interlaced) > - p->height >>= 1; > - p->dwt_width = w = FFALIGN(p->width, (1 << s->wavelet_depth)); > - p->dwt_height = h = FFALIGN(p->height, (1 << s->wavelet_depth)); > - p->coef_stride = FFALIGN(p->dwt_width, 32); > p->coef_buf = av_mallocz(p->coef_stride*p->dwt_height*sizeof(dwtcoef)); > if (!p->coef_buf) > return AVERROR(ENOMEM); > for (level = s->wavelet_depth-1; level >= 0; level--) { > - w = w >> 1; > - h = h >> 1; > for (o = 0; o < 4; o++) { > b = &p->band[level][o]; > - b->width = w; > - b->height = h; > - b->stride = p->coef_stride; > shift = (o > 1)*b->height*b->stride + (o & 1)*b->width; > b->buf = p->coef_buf + shift; > } > @@ -1164,10 +535,6 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) > return AVERROR(ENOMEM); > } > > - /* Slices */ > - s->num_x = s->plane[0].dwt_width/s->slice_width; > - s->num_y = s->plane[0].dwt_height/s->slice_height; > - > s->slice_args = av_calloc(s->num_x*s->num_y, sizeof(SliceArgs)); > if (!s->slice_args) > return AVERROR(ENOMEM); > @@ -1189,8 +556,6 @@ static av_cold int vc2_encode_init(AVCodecContext *avctx) > } > } > > - ff_thread_once(&init_static_once, vc2_init_static_data); > - > return 0; > } > > diff --git a/libavcodec/vc2enc_common.c b/libavcodec/vc2enc_common.c > new file mode 100644 > index 0000000000..bd27fd3c40 > --- /dev/null > +++ b/libavcodec/vc2enc_common.c > @@ -0,0 +1,571 @@ > +/* > +* Copyright (C) 2016 Open Broadcast Systems Ltd. > +* Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com> > +* > +* This file is part of FFmpeg. > +* > +* FFmpeg is free software; you can redistribute it and/or > +* modify it under the terms of the GNU Lesser General Public > +* License as published by the Free Software Foundation; either > +* version 2.1 of the License, or (at your option) any later version. > +* > +* FFmpeg is distributed in the hope that it will be useful, > +* but WITHOUT ANY WARRANTY; without even the implied warranty of > +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +* Lesser General Public License for more details. > +* > +* You should have received a copy of the GNU Lesser General Public > +* License along with FFmpeg; if not, write to the Free Software > +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > +*/ > + > +#include "libavutil/pixdesc.h" > +#include "libavutil/thread.h" > +#include "vc2enc_common.h" > +#include "version.h" > + > +typedef struct VC2BaseVideoFormat { > + enum AVPixelFormat pix_fmt; > + AVRational time_base; > + int width, height; > + uint8_t interlaced, level; > + char name[13]; > +} VC2BaseVideoFormat; > + > +static const VC2BaseVideoFormat base_video_fmts[] = { > + { 0 }, /* Custom format, here just to make indexing equal to base_vf */ > + { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 176, 120, 0, 1, "QSIF525" }, > + { AV_PIX_FMT_YUV420P, { 2, 25 }, 176, 144, 0, 1, "QCIF" }, > + { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 352, 240, 0, 1, "SIF525" }, > + { AV_PIX_FMT_YUV420P, { 2, 25 }, 352, 288, 0, 1, "CIF" }, > + { AV_PIX_FMT_YUV420P, { 1001, 15000 }, 704, 480, 0, 1, "4SIF525" }, > + { AV_PIX_FMT_YUV420P, { 2, 25 }, 704, 576, 0, 1, "4CIF" }, > + > + { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 480, 1, 2, "SD480I-60" }, > + { AV_PIX_FMT_YUV422P10, { 1, 25 }, 720, 576, 1, 2, "SD576I-50" }, > + > + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1280, 720, 0, 3, "HD720P-60" }, > + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1280, 720, 0, 3, "HD720P-50" }, > + { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 1920, 1080, 1, 3, "HD1080I-60" }, > + { AV_PIX_FMT_YUV422P10, { 1, 25 }, 1920, 1080, 1, 3, "HD1080I-50" }, > + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 1920, 1080, 0, 3, "HD1080P-60" }, > + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 1920, 1080, 0, 3, "HD1080P-50" }, > + > + { AV_PIX_FMT_YUV444P12, { 1, 24 }, 2048, 1080, 0, 4, "DC2K" }, > + { AV_PIX_FMT_YUV444P12, { 1, 24 }, 4096, 2160, 0, 5, "DC4K" }, > + > + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 3840, 2160, 0, 6, "UHDTV 4K-60" }, > + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 3840, 2160, 0, 6, "UHDTV 4K-50" }, > + > + { AV_PIX_FMT_YUV422P10, { 1001, 60000 }, 7680, 4320, 0, 7, "UHDTV 8K-60" }, > + { AV_PIX_FMT_YUV422P10, { 1, 50 }, 7680, 4320, 0, 7, "UHDTV 8K-50" }, > + > + { AV_PIX_FMT_YUV422P10, { 1001, 24000 }, 1920, 1080, 0, 3, "HD1080P-24" }, > + { AV_PIX_FMT_YUV422P10, { 1001, 30000 }, 720, 486, 1, 2, "SD Pro486" }, > +}; > +static const int base_video_fmts_len = FF_ARRAY_ELEMS(base_video_fmts); > + > +/// x_k x_{k-1} ... x_0 -> 0 x_k 0 x_{k - 1} ... 0 x_0 > +uint16_t interleaved_ue_golomb_tab[256]; > +/// 1 x_{k-1} ... x_0 -> 0 0 0 x_{k - 1} ... 0 x_0 > +uint16_t top_interleaved_ue_golomb_tab[256]; > +/// 1 x_{k-1} ... x_0 -> 2 * k > +uint8_t golomb_len_tab[256]; > + > +static av_cold void vc2_init_static_data(void) > +{ > + interleaved_ue_golomb_tab[1] = 1; > + for (unsigned i = 2; i < 256; ++i) { > + golomb_len_tab[i] = golomb_len_tab[i >> 1] + 2; > + interleaved_ue_golomb_tab[i] = (interleaved_ue_golomb_tab[i >> 1] << 2) | (i & 1); > + top_interleaved_ue_golomb_tab[i] = interleaved_ue_golomb_tab[i] ^ (1 << golomb_len_tab[i]); > + } > +} > + > +static void put_vc2_ue_uint(PutBitContext *pb, uint32_t val) > +{ > + put_vc2_ue_uint_inline(pb, val); > +} > + > +/* VC-2 10.4 - parse_info() */ > +static void encode_parse_info(VC2EncContext *s, enum DiracParseCodes pcode) > +{ > + uint32_t cur_pos, dist; > + > + align_put_bits(&s->pb); > + > + cur_pos = put_bytes_count(&s->pb, 0); > + > + /* Magic string */ > + ff_put_string(&s->pb, "BBCD", 0); > + > + /* Parse code */ > + put_bits(&s->pb, 8, pcode); > + > + /* Next parse offset */ > + dist = cur_pos - s->next_parse_offset; > + AV_WB32(s->pb.buf + s->next_parse_offset + 5, dist); > + s->next_parse_offset = cur_pos; > + put_bits32(&s->pb, pcode == DIRAC_PCODE_END_SEQ ? 13 : 0); > + > + cur_pos = put_bytes_count(&s->pb, 0); > + > + /* Last parse offset */ > + put_bits32(&s->pb, s->last_parse_code == DIRAC_PCODE_END_SEQ ? 13 : dist); > + > + s->last_parse_code = pcode; > +} > + > +/* VC-2 11.1 - parse_parameters() > +* The level dictates what the decoder should expect in terms of resolution > +* and allows it to quickly reject whatever it can't support. Remember, > +* this codec kinda targets cheapo FPGAs without much memory. Unfortunately > +* it also limits us greatly in our choice of formats, hence the flag to disable > +* strict_compliance */ > +static void encode_parse_params(VC2EncContext *s) > +{ > + put_vc2_ue_uint(&s->pb, s->ver.major); /* VC-2 demands this to be 2 */ > + put_vc2_ue_uint(&s->pb, s->ver.minor); /* ^^ and this to be 0 */ > + put_vc2_ue_uint(&s->pb, s->profile); /* 3 to signal HQ profile */ > + put_vc2_ue_uint(&s->pb, s->level); /* 3 - 1080/720, 6 - 4K */ > +} > + > +/* VC-2 11.3 - frame_size() */ > +static void encode_frame_size(VC2EncContext *s) > +{ > + put_bits(&s->pb, 1, !s->strict_compliance); > + if (!s->strict_compliance) { > + AVCodecContext *avctx = s->avctx; > + put_vc2_ue_uint(&s->pb, avctx->width); > + put_vc2_ue_uint(&s->pb, avctx->height); > + } > +} > + > +/* VC-2 11.3.3 - color_diff_sampling_format() */ > +static void encode_sample_fmt(VC2EncContext *s) > +{ > + put_bits(&s->pb, 1, !s->strict_compliance); > + if (!s->strict_compliance) { > + int idx; > + if (s->chroma_x_shift == 1 && s->chroma_y_shift == 0) > + idx = 1; /* 422 */ > + else if (s->chroma_x_shift == 1 && s->chroma_y_shift == 1) > + idx = 2; /* 420 */ > + else > + idx = 0; /* 444 */ > + put_vc2_ue_uint(&s->pb, idx); > + } > +} > + > +/* VC-2 11.3.4 - scan_format() */ > +static void encode_scan_format(VC2EncContext *s) > +{ > + put_bits(&s->pb, 1, !s->strict_compliance); > + if (!s->strict_compliance) > + put_vc2_ue_uint(&s->pb, s->interlaced); > +} > + > +/* VC-2 11.3.5 - frame_rate() */ > +static void encode_frame_rate(VC2EncContext *s) > +{ > + put_bits(&s->pb, 1, !s->strict_compliance); > + if (!s->strict_compliance) { > + AVCodecContext *avctx = s->avctx; > + put_vc2_ue_uint(&s->pb, 0); > + put_vc2_ue_uint(&s->pb, avctx->time_base.den); > + put_vc2_ue_uint(&s->pb, avctx->time_base.num); > + } > +} > + > +/* VC-2 11.3.6 - aspect_ratio() */ > +static void encode_aspect_ratio(VC2EncContext *s) > +{ > + put_bits(&s->pb, 1, !s->strict_compliance); > + if (!s->strict_compliance) { > + AVCodecContext *avctx = s->avctx; > + put_vc2_ue_uint(&s->pb, 0); > + put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.num); > + put_vc2_ue_uint(&s->pb, avctx->sample_aspect_ratio.den); > + } > +} > + > +/* VC-2 11.3.7 - clean_area() */ > +static void encode_clean_area(VC2EncContext *s) > +{ > + put_bits(&s->pb, 1, 0); > +} > + > +/* VC-2 11.3.8 - signal_range() */ > +static void encode_signal_range(VC2EncContext *s) > +{ > + put_bits(&s->pb, 1, !s->strict_compliance); > + if (!s->strict_compliance) > + put_vc2_ue_uint(&s->pb, s->bpp_idx); > +} > + > +/* VC-2 11.3.9 - color_spec() */ > +static void encode_color_spec(VC2EncContext *s) > +{ > + AVCodecContext *avctx = s->avctx; > + put_bits(&s->pb, 1, !s->strict_compliance); > + if (!s->strict_compliance) { > + int val; > + put_vc2_ue_uint(&s->pb, 0); > + > + /* primaries */ > + put_bits(&s->pb, 1, 1); > + if (avctx->color_primaries == AVCOL_PRI_BT470BG) > + val = 2; > + else if (avctx->color_primaries == AVCOL_PRI_SMPTE170M) > + val = 1; > + else if (avctx->color_primaries == AVCOL_PRI_SMPTE240M) > + val = 1; > + else > + val = 0; > + put_vc2_ue_uint(&s->pb, val); > + > + /* color matrix */ > + put_bits(&s->pb, 1, 1); > + if (avctx->colorspace == AVCOL_SPC_RGB) > + val = 3; > + else if (avctx->colorspace == AVCOL_SPC_YCOCG) > + val = 2; > + else if (avctx->colorspace == AVCOL_SPC_BT470BG) > + val = 1; > + else > + val = 0; > + put_vc2_ue_uint(&s->pb, val); > + > + /* transfer function */ > + put_bits(&s->pb, 1, 1); > + if (avctx->color_trc == AVCOL_TRC_LINEAR) > + val = 2; > + else if (avctx->color_trc == AVCOL_TRC_BT1361_ECG) > + val = 1; > + else > + val = 0; > + put_vc2_ue_uint(&s->pb, val); > + } > +} > + > +/* VC-2 11.3 - source_parameters() */ > +static void encode_source_params(VC2EncContext *s) > +{ > + encode_frame_size(s); > + encode_sample_fmt(s); > + encode_scan_format(s); > + encode_frame_rate(s); > + encode_aspect_ratio(s); > + encode_clean_area(s); > + encode_signal_range(s); > + encode_color_spec(s); > +} > + > +/* VC-2 11 - sequence_header() */ > +static void encode_seq_header(VC2EncContext *s) > +{ > + align_put_bits(&s->pb); > + encode_parse_params(s); > + put_vc2_ue_uint(&s->pb, s->base_vf); > + encode_source_params(s); > + put_vc2_ue_uint(&s->pb, s->interlaced); /* Frames or fields coding */ > +} > + > +/* VC-2 12.1 - picture_header() */ > +static void encode_picture_header(VC2EncContext *s) > +{ > + align_put_bits(&s->pb); > + put_bits32(&s->pb, s->picture_number++); > +} > + > +/* VC-2 12.3.4.1 - slice_parameters() */ > +static void encode_slice_params(VC2EncContext *s) > +{ > + put_vc2_ue_uint(&s->pb, s->num_x); > + put_vc2_ue_uint(&s->pb, s->num_y); > + put_vc2_ue_uint(&s->pb, s->prefix_bytes); > + put_vc2_ue_uint(&s->pb, s->size_scaler); > +} > + > +/* 1st idx = LL, second - vertical, third - horizontal, fourth - total */ > +static const uint8_t vc2_qm_col_tab[][4] = { > + {20, 9, 15, 4}, > + { 0, 6, 6, 4}, > + { 0, 3, 3, 5}, > + { 0, 3, 5, 1}, > + { 0, 11, 10, 11} > +}; > + > +static const uint8_t vc2_qm_flat_tab[][4] = { > + { 0, 0, 0, 0}, > + { 0, 0, 0, 0}, > + { 0, 0, 0, 0}, > + { 0, 0, 0, 0}, > + { 0, 0, 0, 0} > +}; > + > +void ff_vc2_init_quant_matrix(VC2EncContext *s, uint8_t quant[MAX_DWT_LEVELS][4]) > +{ > + int level, orientation; > + > + if (s->wavelet_depth <= 4 && s->quant_matrix == VC2_QM_DEF) { > + s->custom_quant_matrix = 0; > + for (level = 0; level < s->wavelet_depth; level++) { > + quant[level][0] = ff_dirac_default_qmat[s->wavelet_idx][level][0]; > + quant[level][1] = ff_dirac_default_qmat[s->wavelet_idx][level][1]; > + quant[level][2] = ff_dirac_default_qmat[s->wavelet_idx][level][2]; > + quant[level][3] = ff_dirac_default_qmat[s->wavelet_idx][level][3]; > + } > + return; > + } > + > + s->custom_quant_matrix = 1; > + > + if (s->quant_matrix == VC2_QM_DEF) { > + for (level = 0; level < s->wavelet_depth; level++) { > + for (orientation = 0; orientation < 4; orientation++) { > + if (level <= 3) > + quant[level][orientation] = ff_dirac_default_qmat[s->wavelet_idx][level][orientation]; > + else > + quant[level][orientation] = vc2_qm_col_tab[level][orientation]; > + } > + } > + } else if (s->quant_matrix == VC2_QM_COL) { > + for (level = 0; level < s->wavelet_depth; level++) { > + for (orientation = 0; orientation < 4; orientation++) { > + quant[level][orientation] = vc2_qm_col_tab[level][orientation]; > + } > + } > + } else { > + for (level = 0; level < s->wavelet_depth; level++) { > + for (orientation = 0; orientation < 4; orientation++) { > + quant[level][orientation] = vc2_qm_flat_tab[level][orientation]; > + } > + } > + } > +} > + > +/* VC-2 12.3.4.2 - quant_matrix() */ > +static void encode_quant_matrix(VC2EncContext *s) > +{ > + int level; > + put_bits(&s->pb, 1, s->custom_quant_matrix); > + if (s->custom_quant_matrix) { > + put_vc2_ue_uint(&s->pb, s->quant[0][0]); > + for (level = 0; level < s->wavelet_depth; level++) { > + put_vc2_ue_uint(&s->pb, s->quant[level][1]); > + put_vc2_ue_uint(&s->pb, s->quant[level][2]); > + put_vc2_ue_uint(&s->pb, s->quant[level][3]); > + } > + } > +} > + > +/* VC-2 12.3 - transform_parameters() */ > +static void encode_transform_params(VC2EncContext *s) > +{ > + put_vc2_ue_uint(&s->pb, s->wavelet_idx); > + put_vc2_ue_uint(&s->pb, s->wavelet_depth); > + > + encode_slice_params(s); > + encode_quant_matrix(s); > +} > + > +/* VC-2 12.2 - wavelet_transform() */ > +static void encode_wavelet_transform(VC2EncContext *s) > +{ > + encode_transform_params(s); > + align_put_bits(&s->pb); > +} > + > +/* VC-2 12 - picture_parse() */ > +static void encode_picture_start(VC2EncContext *s) > +{ > + align_put_bits(&s->pb); > + encode_picture_header(s); > + align_put_bits(&s->pb); > + encode_wavelet_transform(s); > +} > + > +int ff_vc2_encode_init(AVCodecContext *avctx, int depth) > +{ > + static AVOnce init_static_once = AV_ONCE_INIT; > + int i, level, o; > + SubBand *b; > + Plane *p; > + VC2EncContext *s = avctx->priv_data; > + > + s->picture_number = 0; > + > + /* Total allowed quantization range */ > + s->q_ceil = DIRAC_MAX_QUANT_INDEX; > + > + s->ver.major = 2; > + s->ver.minor = 0; > + s->profile = 3; > + s->level = 3; > + > + s->base_vf = -1; > + s->strict_compliance = 1; > + > + s->q_avg = 0; > + s->slice_max_bytes = 0; > + s->slice_min_bytes = 0; > + > + /* Mark unknown as progressive */ > + s->interlaced = !((avctx->field_order == AV_FIELD_UNKNOWN) || > + (avctx->field_order == AV_FIELD_PROGRESSIVE)); > + > + for (i = 0; i < base_video_fmts_len; i++) { > + const VC2BaseVideoFormat *fmt = &base_video_fmts[i]; > + if (avctx->pix_fmt != fmt->pix_fmt || avctx->time_base.num != fmt->time_base.num || > + avctx->time_base.den != fmt->time_base.den || avctx->width != fmt->width || > + avctx->height != fmt->height || s->interlaced != fmt->interlaced) > + continue; > + s->base_vf = i; > + s->level = base_video_fmts[i].level; > + break; > + } > + > + if (s->interlaced) > + av_log(avctx, AV_LOG_WARNING, "Interlacing enabled!\n"); > + > + if ((s->slice_width & (s->slice_width - 1)) || > + (s->slice_height & (s->slice_height - 1))) { > + av_log(avctx, AV_LOG_ERROR, "Slice size is not a power of two!\n"); > + return AVERROR(EINVAL); > + } > + > + if ((s->slice_width > avctx->width) || > + (s->slice_height > avctx->height)) { > + av_log(avctx, AV_LOG_ERROR, "Slice size is bigger than the image!\n"); > + return AVERROR(EINVAL); > + } > + > + if (s->base_vf <= 0) { > + if (avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) { > + s->strict_compliance = s->base_vf = 0; > + av_log(avctx, AV_LOG_WARNING, "Format does not strictly comply with VC2 specs\n"); > + } else { > + av_log(avctx, AV_LOG_WARNING, "Given format does not strictly comply with " > + "the specifications, decrease strictness to use it.\n"); > + return AVERROR(EINVAL); > + } > + } else { > + av_log(avctx, AV_LOG_INFO, "Selected base video format = %i (%s)\n", > + s->base_vf, base_video_fmts[s->base_vf].name); > + } > + > + /* Bit depth and color range index */ > + if (depth == 8 && avctx->color_range == AVCOL_RANGE_JPEG) { > + s->bpp = 1; > + s->bpp_idx = 1; > + s->diff_offset = 128; > + } else if (depth == 8 && (avctx->color_range == AVCOL_RANGE_MPEG || > + avctx->color_range == AVCOL_RANGE_UNSPECIFIED)) { > + s->bpp = 1; > + s->bpp_idx = 2; > + s->diff_offset = 128; > + } else if (depth == 10) { > + s->bpp = 2; > + s->bpp_idx = 3; > + s->diff_offset = 512; > + } else { > + s->bpp = 2; > + s->bpp_idx = 4; > + s->diff_offset = 2048; > + } > + > + /* Planes initialization */ > + for (i = 0; i < 3; i++) { > + int w, h; > + p = &s->plane[i]; > + p->width = avctx->width >> (i ? s->chroma_x_shift : 0); > + p->height = avctx->height >> (i ? s->chroma_y_shift : 0); > + if (s->interlaced) > + p->height >>= 1; > + p->dwt_width = w = FFALIGN(p->width, (1 << s->wavelet_depth)); > + p->dwt_height = h = FFALIGN(p->height, (1 << s->wavelet_depth)); > + p->coef_stride = FFALIGN(p->dwt_width, 32); > + for (level = s->wavelet_depth-1; level >= 0; level--) { > + w = w >> 1; > + h = h >> 1; > + for (o = 0; o < 4; o++) { > + b = &p->band[level][o]; > + b->width = w; > + b->height = h; > + b->stride = p->coef_stride; > + } > + } > + } > + > + /* Slices */ > + s->num_x = s->plane[0].dwt_width/s->slice_width; > + s->num_y = s->plane[0].dwt_height/s->slice_height; > + > + ff_thread_once(&init_static_once, vc2_init_static_data); > + > + return 0; > +} > + > +int ff_vc2_frame_init_properties(AVCodecContext *avctx, VC2EncContext *s) > +{ > + int slice_ceil, sig_size = 256; > + const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT; > + const int aux_data_size = bitexact ? sizeof("Lavc") : sizeof(LIBAVCODEC_IDENT); > + const int header_size = 100 + aux_data_size; > + int64_t r_bitrate = avctx->bit_rate >> (s->interlaced); > + > + s->avctx = avctx; > + s->size_scaler = 2; > + s->prefix_bytes = 0; > + s->last_parse_code = 0; > + s->next_parse_offset = 0; > + > + /* Rate control */ > + s->frame_max_bytes = (av_rescale(r_bitrate, s->avctx->time_base.num, > + s->avctx->time_base.den) >> 3) - header_size; > + s->slice_max_bytes = slice_ceil = av_rescale(s->frame_max_bytes, 1, s->num_x * s->num_y); > + > + /* Find an appropriate size scaler */ > + while (sig_size > 255) { > + int r_size = SSIZE_ROUND(s->slice_max_bytes); > + if (r_size > slice_ceil) { > + s->slice_max_bytes -= r_size - slice_ceil; > + r_size = SSIZE_ROUND(s->slice_max_bytes); > + } > + sig_size = r_size/s->size_scaler; /* Signalled slize size */ > + s->size_scaler <<= 1; > + } > + > + s->slice_min_bytes = s->slice_max_bytes - s->slice_max_bytes*(s->tolerance/100.0f); > + if (s->slice_min_bytes < 0 || s->slice_max_bytes > INT_MAX >> 3) > + return AVERROR(EINVAL); > + > + return 0; > +} > + > +void ff_vc2_encode_frame(VC2EncContext *s, void(*encode_slices)(VC2EncContext*)) > +{ > + const int bitexact = s->avctx->flags & AV_CODEC_FLAG_BITEXACT; > + const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT; > + > + /* Sequence header */ > + encode_parse_info(s, DIRAC_PCODE_SEQ_HEADER); > + encode_seq_header(s); > + > + /* Encoder version */ > + if (aux_data) { > + encode_parse_info(s, DIRAC_PCODE_AUX); > + ff_put_string(&s->pb, aux_data, 1); > + } > + > + /* Picture header */ > + encode_parse_info(s, DIRAC_PCODE_PICTURE_HQ); > + encode_picture_start(s); > + > + /* Encode slices */ > + encode_slices(s); I know that I told you to factor out writing the actual frame, but I was not aware at the time that encode_slices() will differ between the encoders. Better add two functions: ff_vc2_write_frame_header(), doing everything before encode_slices() here and one ff_vc2_write_sequence_end() for the end-of-sequence below. > + > + /* End sequence */ > + encode_parse_info(s, DIRAC_PCODE_END_SEQ); > +} > \ No newline at end of file > diff --git a/libavcodec/vc2enc_common.h b/libavcodec/vc2enc_common.h > new file mode 100644 > index 0000000000..0466869943 > --- /dev/null > +++ b/libavcodec/vc2enc_common.h > @@ -0,0 +1,178 @@ > +/* > +* Copyright (C) 2016 Open Broadcast Systems Ltd. > +* Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com> > +* > +* This file is part of FFmpeg. > +* > +* FFmpeg is free software; you can redistribute it and/or > +* modify it under the terms of the GNU Lesser General Public > +* License as published by the Free Software Foundation; either > +* version 2.1 of the License, or (at your option) any later version. > +* > +* FFmpeg is distributed in the hope that it will be useful, > +* but WITHOUT ANY WARRANTY; without even the implied warranty of > +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +* Lesser General Public License for more details. > +* > +* You should have received a copy of the GNU Lesser General Public > +* License along with FFmpeg; if not, write to the Free Software > +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > +*/ > + > +#ifndef AVCODEC_VC2ENC_COMMON_H > +#define AVCODEC_VC2ENC_COMMON_H > + > +#include "avcodec.h" > +#include "dirac.h" > +#include "put_bits.h" > +#include "libavutil/attributes_internal.h" > + > +#include "vc2enc_dwt.h" > +#include "diractab.h" > + > +/* The limited size resolution of each slice forces us to do this */ > +#define SSIZE_ROUND(b) (FFALIGN((b), s->size_scaler) + 4 + s->prefix_bytes) > + > +FF_VISIBILITY_PUSH_HIDDEN > + > +enum VC2_QM { > + VC2_QM_DEF = 0, > + VC2_QM_COL, > + VC2_QM_FLAT, > + > + VC2_QM_NB > +}; > + > +typedef struct SubBand { > + dwtcoef *buf; > + ptrdiff_t stride; > + int width; > + int height; > + int shift; > +} SubBand; > + > +typedef struct Plane { > + SubBand band[MAX_DWT_LEVELS][4]; > + dwtcoef *coef_buf; > + int width; > + int height; > + int dwt_width; > + int dwt_height; > + ptrdiff_t coef_stride; > +} Plane; > + > +typedef struct SliceArgs { > + const struct VC2EncContext *ctx; > + union { > + int cache[DIRAC_MAX_QUANT_INDEX]; > + uint8_t *buf; > + }; > + int x; > + int y; > + int quant_idx; > + int bits_ceil; > + int bits_floor; > + int bytes; > +} SliceArgs; This structure is only used by the software encoder; use a "struct SliceArgs *" below and keep this structure in vc2enc.c. This will allow to avoid the diractab inclusion (if I am not mistaken). > + > +typedef struct TransformArgs { > + const struct VC2EncContext *ctx; > + Plane *plane; > + const void *idata; > + ptrdiff_t istride; > + int field; > + VC2TransformContext t; > +} TransformArgs; > + > +typedef struct VC2EncContext { > + AVClass *av_class; > + PutBitContext pb; > + Plane plane[3]; > + AVCodecContext *avctx; > + DiracVersionInfo ver; > + > + SliceArgs *slice_args; > + TransformArgs transform_args[3]; > + > + /* For conversion from unsigned pixel values to signed */ > + int diff_offset; > + int bpp; > + int bpp_idx; > + > + /* Picture number */ > + uint32_t picture_number; > + > + /* Base video format */ > + int base_vf; > + int level; > + int profile; > + > + /* Quantization matrix */ > + uint8_t quant[MAX_DWT_LEVELS][4]; > + int custom_quant_matrix; > + > + /* Division LUT */ > + uint32_t qmagic_lut[116][2]; > + > + int num_x; /* #slices horizontally */ > + int num_y; /* #slices vertically */ > + int prefix_bytes; > + int size_scaler; > + int chroma_x_shift; > + int chroma_y_shift; > + > + /* Rate control stuff */ > + int frame_max_bytes; > + int slice_max_bytes; > + int slice_min_bytes; > + int q_ceil; > + int q_avg; > + > + /* Options */ > + double tolerance; > + int wavelet_idx; > + int wavelet_depth; > + int strict_compliance; > + int slice_height; > + int slice_width; > + int interlaced; > + enum VC2_QM quant_matrix; > + > + /* Parse code state */ > + uint32_t next_parse_offset; > + enum DiracParseCodes last_parse_code; > +} VC2EncContext; > + > +extern uint16_t interleaved_ue_golomb_tab[256]; > +extern uint16_t top_interleaved_ue_golomb_tab[256]; > +extern uint8_t golomb_len_tab[256]; Missing ff_ prefix. > + > +static inline void put_vc2_ue_uint_inline(PutBitContext *pb, uint32_t val) > +{ > + uint64_t pbits = 1; > + int bits = 1; > + > + ++val; > + > + while (val >> 8) { > + pbits |= (uint64_t)interleaved_ue_golomb_tab[val & 0xff] << bits; > + val >>= 8; > + bits += 16; > + } > + pbits |= (uint64_t)top_interleaved_ue_golomb_tab[val] << bits; > + bits += golomb_len_tab[val]; > + > + put_bits63(pb, bits, pbits); > +} > + > +int ff_vc2_encode_init(AVCodecContext *avctx, int depth); > + > +int ff_vc2_frame_init_properties(AVCodecContext *avctx, VC2EncContext *s); > + > +void ff_vc2_init_quant_matrix(VC2EncContext *s, uint8_t quant[MAX_DWT_LEVELS][4]); > + > +void ff_vc2_encode_frame(VC2EncContext *s, void(*encode_slices)(VC2EncContext*)); > + > +FF_VISIBILITY_POP_HIDDEN > + > +#endif _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2025-05-19 17:40 UTC | newest] Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2025-05-17 20:48 [FFmpeg-devel] [PATCH v4 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders IndecisiveTurtle 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 2/4] libavcodec/vc2enc: Switch quant to int IndecisiveTurtle 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 3/4] libavcodec/vulkan: Add modifications to common shader for VC2 vulkan encoder IndecisiveTurtle 2025-05-19 16:46 ` Andreas Rheinhardt 2025-05-19 17:02 ` IndecisiveTurtle 2025-05-17 20:48 ` [FFmpeg-devel] [PATCH v4 4/4] lavc: implement a Vulkan-based VC-2 encoder Implements a Vulkan based dirac encoder. Supports Haar and Legall wavelets and should work with all wavelet depths IndecisiveTurtle 2025-05-17 20:50 ` IndecisiveTurtle 2025-05-19 17:09 ` Andreas Rheinhardt 2025-05-19 17:40 ` IndecisiveTurtle 2025-05-19 15:56 ` [FFmpeg-devel] [PATCH v4 1/4] libavcodec/vc2enc: Split out common functions between software and hardware encoders Andreas Rheinhardt
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git