* [FFmpeg-devel] [PATCH v1 1/2] vaapi: add vaapi_cavs support
@ 2024-01-19 15:49 jianfeng.zheng
2024-01-20 4:20 ` Zhao Zhili
0 siblings, 1 reply; 2+ messages in thread
From: jianfeng.zheng @ 2024-01-19 15:49 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: jianfeng.zheng
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=y, Size: 40396 bytes --]
see https://github.com/intel/libva/pull/738
[Moore Threads](https://www.mthreads.com) (short for Mthreads) is a
Chinese GPU manufacturer. All our products, like MTTS70/MTTS80/.. ,
support AVS/AVS+ HW decoding at max 2k resolution.
Signed-off-by: jianfeng.zheng <jianfeng.zheng@mthreads.com>
---
configure | 14 ++
libavcodec/Makefile | 1 +
libavcodec/cavs.c | 12 +
libavcodec/cavs.h | 36 ++-
libavcodec/cavs_parser.c | 16 ++
libavcodec/cavsdec.c | 473 +++++++++++++++++++++++++++++++++-----
libavcodec/defs.h | 3 +
libavcodec/hwaccels.h | 1 +
libavcodec/profiles.c | 6 +
libavcodec/profiles.h | 1 +
libavcodec/vaapi_cavs.c | 164 +++++++++++++
libavcodec/vaapi_decode.c | 4 +
12 files changed, 669 insertions(+), 62 deletions(-)
create mode 100644 libavcodec/vaapi_cavs.c
diff --git a/configure b/configure
index c8ae0a061d..89759eda5d 100755
--- a/configure
+++ b/configure
@@ -2463,6 +2463,7 @@ HAVE_LIST="
xmllint
zlib_gzip
openvino2
+ va_profile_avs
"
# options emitted with CONFIG_ prefix but not available on the command line
@@ -3202,6 +3203,7 @@ wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel"
wmv3_nvdec_hwaccel_select="vc1_nvdec_hwaccel"
wmv3_vaapi_hwaccel_select="vc1_vaapi_hwaccel"
wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
+cavs_vaapi_hwaccel_deps="vaapi va_profile_avs VAPictureParameterBufferAVS"
# hardware-accelerated codecs
mediafoundation_deps="mftransform_h MFCreateAlignedMemoryBuffer"
@@ -7175,6 +7177,18 @@ if enabled vaapi; then
check_type "va/va.h va/va_enc_vp8.h" "VAEncPictureParameterBufferVP8"
check_type "va/va.h va/va_enc_vp9.h" "VAEncPictureParameterBufferVP9"
check_type "va/va.h va/va_enc_av1.h" "VAEncPictureParameterBufferAV1"
+
+ #
+ # Using 'VA_CHECK_VERSION' in source codes make things easy. But we have to wait
+ # until newly added VAProfile being distributed by VAAPI released version.
+ #
+ # Before or after that, we can use auto-detection to keep version compatibility.
+ # It always works.
+ #
+ disable va_profile_avs &&
+ test_code cc va/va.h "VAProfile p1 = VAProfileAVSJizhun, p2 = VAProfileAVSGuangdian;" &&
+ enable va_profile_avs
+ enabled va_profile_avs && check_type "va/va.h va/va_dec_avs.h" "VAPictureParameterBufferAVS"
fi
if enabled_all opencl libdrm ; then
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index bb42095165..7d92375fed 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1055,6 +1055,7 @@ OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) += vaapi_vp9.o
OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o
OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o
OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
+OBJS-$(CONFIG_CAVS_VAAPI_HWACCEL) += vaapi_cavs.o
# Objects duplicated from other libraries for shared builds
SHLIBOBJS += log2_tab.o reverse.o
diff --git a/libavcodec/cavs.c b/libavcodec/cavs.c
index fdd577f7fb..ed7b278336 100644
--- a/libavcodec/cavs.c
+++ b/libavcodec/cavs.c
@@ -810,6 +810,14 @@ av_cold int ff_cavs_init(AVCodecContext *avctx)
if (!h->cur.f || !h->DPB[0].f || !h->DPB[1].f)
return AVERROR(ENOMEM);
+ h->out[0].f = av_frame_alloc();
+ h->out[1].f = av_frame_alloc();
+ h->out[2].f = av_frame_alloc();
+ if (!h->out[0].f || !h->out[1].f || !h->out[2].f) {
+ ff_cavs_end(avctx);
+ return AVERROR(ENOMEM);
+ }
+
h->luma_scan[0] = 0;
h->luma_scan[1] = 8;
h->intra_pred_l[INTRA_L_VERT] = intra_pred_vert;
@@ -840,6 +848,10 @@ av_cold int ff_cavs_end(AVCodecContext *avctx)
av_frame_free(&h->DPB[0].f);
av_frame_free(&h->DPB[1].f);
+ av_frame_free(&h->out[0].f);
+ av_frame_free(&h->out[1].f);
+ av_frame_free(&h->out[2].f);
+
av_freep(&h->top_qp);
av_freep(&h->top_mv[0]);
av_freep(&h->top_mv[1]);
diff --git a/libavcodec/cavs.h b/libavcodec/cavs.h
index 244c322b35..ef03c1a974 100644
--- a/libavcodec/cavs.h
+++ b/libavcodec/cavs.h
@@ -39,8 +39,10 @@
#define EXT_START_CODE 0x000001b5
#define USER_START_CODE 0x000001b2
#define CAVS_START_CODE 0x000001b0
+#define VIDEO_SEQ_END_CODE 0x000001b1
#define PIC_I_START_CODE 0x000001b3
#define PIC_PB_START_CODE 0x000001b6
+#define VIDEO_EDIT_CODE 0x000001b7
#define A_AVAIL 1
#define B_AVAIL 2
@@ -164,10 +166,15 @@ struct dec_2dvlc {
typedef struct AVSFrame {
AVFrame *f;
int poc;
+ int outputed;
+
+ AVBufferRef *hwaccel_priv_buf;
+ void *hwaccel_picture_private;
} AVSFrame;
typedef struct AVSContext {
AVCodecContext *avctx;
+ int got_pix_fmt;
BlockDSPContext bdsp;
H264ChromaContext h264chroma;
VideoDSPContext vdsp;
@@ -175,6 +182,7 @@ typedef struct AVSContext {
GetBitContext gb;
AVSFrame cur; ///< currently decoded frame
AVSFrame DPB[2]; ///< reference frames
+ AVSFrame out[3]; ///< output queue, size 2 maybe enough
int dist[2]; ///< temporal distances from current frame to ref frames
int low_delay;
int profile, level;
@@ -182,12 +190,38 @@ typedef struct AVSContext {
int mb_width, mb_height;
int width, height;
int stream_revision; ///<0 for samples from 2006, 1 for rm52j encoder
- int progressive;
+ int progressive_seq;
+ int progressive_frame;
int pic_structure;
+ int no_forward_ref_flag;
+ int pb_field_enhanced_flag; ///< only used in GUANGDIAN
int skip_mode_flag; ///< select between skip_count or one skip_flag per MB
int loop_filter_disable;
int alpha_offset, beta_offset;
int ref_flag;
+
+ /** \defgroup guangdian profile
+ * @{
+ */
+ int aec_flag;
+ int weight_quant_flag;
+ int chroma_quant_param_delta_cb;
+ int chroma_quant_param_delta_cr;
+ uint8_t wqm_8x8[64];
+ /**@}*/
+
+ /** \defgroup slice weighting
+ * FFmpeg don't support slice weighting natively, but maybe needed for HWaccel.
+ * @{
+ */
+ uint32_t slice_weight_pred_flag : 1;
+ uint32_t mb_weight_pred_flag : 1;
+ uint8_t luma_scale[4];
+ int8_t luma_shift[4];
+ uint8_t chroma_scale[4];
+ int8_t chroma_shift[4];
+ /**@}*/
+
int mbx, mby, mbidx; ///< macroblock coordinates
int flags; ///< availability flags of neighbouring macroblocks
int stc; ///< last start code
diff --git a/libavcodec/cavs_parser.c b/libavcodec/cavs_parser.c
index 4a03effd0f..a41a82d8d1 100644
--- a/libavcodec/cavs_parser.c
+++ b/libavcodec/cavs_parser.c
@@ -65,6 +65,22 @@ static int cavs_find_frame_end(ParseContext *pc, const uint8_t *buf,
pc->state=-1;
return i-3;
}
+ if((state&0xFFFFFF00) == 0x100){
+ if(state != EXT_START_CODE && state != USER_START_CODE){
+ state = state >> 8;
+ break;
+ }
+ }
+ }
+ for(; i<buf_size; i++){
+ state= (state<<8) | buf[i];
+ if((state&0xFFFFFF00) == 0x100){
+ if(state > SLICE_MAX_START_CODE){
+ pc->frame_start_found=0;
+ pc->state=-1;
+ return i-3;
+ }
+ }
}
}
pc->frame_start_found= pic_found;
diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index b356da0b04..18c38cd3ff 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -25,11 +25,15 @@
* @author Stefan Gehrer <stefan.gehrer@gmx.de>
*/
+#include "config_components.h"
#include "libavutil/avassert.h"
#include "libavutil/emms.h"
#include "avcodec.h"
#include "get_bits.h"
#include "golomb.h"
+#include "hwaccel_internal.h"
+#include "hwconfig.h"
+#include "profiles.h"
#include "cavs.h"
#include "codec_internal.h"
#include "decode.h"
@@ -37,6 +41,43 @@
#include "mpeg12data.h"
#include "startcode.h"
+static const uint8_t default_wq_param[4][6] = {
+ {128, 98, 106, 116, 116, 128},
+ {135, 143, 143, 160, 160, 213},
+ {128, 98, 106, 116, 116, 128},
+ {128, 128, 128, 128, 128, 128},
+};
+static const uint8_t wq_model_2_param[4][64] = {
+ {
+ 0, 0, 0, 4, 4, 4, 5, 5,
+ 0, 0, 3, 3, 3, 3, 5, 5,
+ 0, 3, 2, 2, 1, 1, 5, 5,
+ 4, 3, 2, 2, 1, 5, 5, 5,
+ 4, 3, 1, 1, 5, 5, 5, 5,
+ 4, 3, 1, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ }, {
+ 0, 0, 0, 4, 4, 4, 5, 5,
+ 0, 0, 4, 4, 4, 4, 5, 5,
+ 0, 3, 2, 2, 2, 1, 5, 5,
+ 3, 3, 2, 2, 1, 5, 5, 5,
+ 3, 3, 2, 1, 5, 5, 5, 5,
+ 3, 3, 1, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ }, {
+ 0, 0, 0, 4, 4, 3, 5, 5,
+ 0, 0, 4, 4, 3, 2, 5, 5,
+ 0, 4, 4, 3, 2, 1, 5, 5,
+ 4, 4, 3, 2, 1, 5, 5, 5,
+ 4, 3, 2, 1, 5, 5, 5, 5,
+ 3, 2, 1, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ 5, 5, 5, 5, 5, 5, 5, 5,
+ }
+};
+
static const uint8_t mv_scan[4] = {
MV_FWD_X0, MV_FWD_X1,
MV_FWD_X2, MV_FWD_X3
@@ -927,7 +968,11 @@ static int decode_mb_b(AVSContext *h, enum cavs_mb mb_type)
static inline int decode_slice_header(AVSContext *h, GetBitContext *gb)
{
- if (h->stc > 0xAF)
+ int i, nref;
+
+ av_log(h->avctx, AV_LOG_TRACE, "slice start code 0x%02x\n", h->stc);
+
+ if (h->stc > SLICE_MAX_START_CODE)
av_log(h->avctx, AV_LOG_ERROR, "unexpected start code 0x%02x\n", h->stc);
if (h->stc >= h->mb_height) {
@@ -946,14 +991,119 @@ static inline int decode_slice_header(AVSContext *h, GetBitContext *gb)
}
/* inter frame or second slice can have weighting params */
if ((h->cur.f->pict_type != AV_PICTURE_TYPE_I) ||
- (!h->pic_structure && h->mby >= h->mb_width / 2))
- if (get_bits1(gb)) { //slice_weighting_flag
- av_log(h->avctx, AV_LOG_ERROR,
- "weighted prediction not yet supported\n");
+ (!h->pic_structure && h->mby >= h->mb_height / 2)) {
+ h->slice_weight_pred_flag = get_bits1(gb);
+ if (h->slice_weight_pred_flag) {
+ nref = h->cur.f->pict_type == AV_PICTURE_TYPE_I ? 1 : (h->pic_structure ? 2 : 4);
+ for (i = 0; i < nref; i++) {
+ h->luma_scale[i] = get_bits(gb, 8);
+ h->luma_shift[i] = get_sbits(gb, 8);
+ skip_bits1(gb);
+ h->chroma_scale[i] = get_bits(gb, 8);
+ h->chroma_shift[i] = get_sbits(gb, 8);
+ skip_bits1(gb);
+ }
+ h->mb_weight_pred_flag = get_bits1(gb);
+ if (!h->avctx->hwaccel) {
+ av_log(h->avctx, AV_LOG_ERROR,
+ "weighted prediction not yet supported\n");
+ }
}
+ }
+ if (h->aec_flag) {
+ align_get_bits(gb);
+ }
+
+ return 0;
+}
+
+/**
+ * skip stuffing bits before next start code "0x000001"
+ * @return '0' no stuffing bits placed at h->gb being skip, else '1'.
+ */
+static inline int skip_stuffing_bits(AVSContext *h)
+{
+ GetBitContext gb0 = h->gb;
+ GetBitContext *gb = &h->gb;
+ const uint8_t *start;
+ const uint8_t *ptr;
+ const uint8_t *end;
+ int align;
+ int stuffing_zeros;
+
+#if 0
+ /**
+ * skip 1 bit stuffing_bit '1' and 0~7 bit stuffing_bit '0'
+ */
+ if (!get_bits1(gb)) {
+ av_log(h->avctx, AV_LOG_WARNING, "NOT stuffing_bit '1'\n");
+ goto restore_get_bits;
+ }
+ align = (-get_bits_count(gb)) & 7;
+ if (show_bits_long(gb, align)) {
+ av_log(h->avctx, AV_LOG_WARNING, "NOT %d stuffing_bit '0..0'\n", align);
+ goto restore_get_bits;
+ }
+#else
+ /**
+ * Seems like not all the stream follow "next_start_code()" strictly.
+ */
+ align = (-get_bits_count(gb)) & 7;
+ if (align == 0 && show_bits_long(gb, 8) == 0x80) {
+ skip_bits_long(gb, 8);
+ }
+#endif
+
+ /**
+ * skip leading zero bytes before 0x 00 00 01 stc
+ */
+ ptr = start = align_get_bits(gb);
+ end = gb->buffer_end;
+ while (ptr < end && *ptr == 0)
+ ptr++;
+
+ if ((ptr >= end) || (*ptr == 1 && ptr - start >= 2)) {
+ stuffing_zeros = (ptr >= end ? end - start : ptr - start - 2);
+ if (stuffing_zeros > 0)
+ av_log(h->avctx, AV_LOG_DEBUG, "Skip 0x%x stuffing zeros @0x%x.\n",
+ stuffing_zeros, (int)(start - gb->buffer));
+ skip_bits_long(gb, stuffing_zeros * 8);
+ return 1;
+ } else {
+ av_log(h->avctx, AV_LOG_DEBUG, "No next_start_code() found @0x%x.\n",
+ (int)(start - gb->buffer));
+ goto restore_get_bits;
+ }
+
+restore_get_bits:
+ h->gb = gb0;
return 0;
}
+static inline int skip_extension_and_user_data(AVSContext *h)
+{
+ int stc = -1;
+ const uint8_t *start = align_get_bits(&h->gb);
+ const uint8_t *end = h->gb.buffer_end;
+ const uint8_t *ptr, *next;
+
+ for (ptr = start; ptr + 4 < end; ptr = next) {
+ stc = show_bits_long(&h->gb, 32);
+ if (stc != EXT_START_CODE && stc != USER_START_CODE) {
+ break;
+ }
+ next = avpriv_find_start_code(ptr + 4, end, &stc);
+ if (next < end) {
+ next -= 4;
+ }
+ skip_bits(&h->gb, (next - ptr) * 8);
+ av_log(h->avctx, AV_LOG_DEBUG, "skip %d byte ext/user data\n",
+ (int)(next - ptr));
+ }
+
+ return ptr > start;
+}
+
static inline int check_for_slice(AVSContext *h)
{
GetBitContext *gb = &h->gb;
@@ -981,44 +1131,133 @@ static inline int check_for_slice(AVSContext *h)
* frame level
*
****************************************************************************/
+static int hwaccel_pic(AVSContext *h)
+{
+ int ret = 0;
+ int stc = -1;
+ const uint8_t *frm_start = align_get_bits(&h->gb);
+ const uint8_t *frm_end = h->gb.buffer_end;
+ const uint8_t *slc_start = frm_start;
+ const uint8_t *slc_end = frm_end;
+ GetBitContext gb = h->gb;
+ const FFHWAccel *hwaccel = ffhwaccel(h->avctx->hwaccel);
+
+ ret = hwaccel->start_frame(h->avctx, NULL, 0);
+ if (ret < 0)
+ return ret;
+
+ for (slc_start = frm_start; slc_start + 4 < frm_end; slc_start = slc_end) {
+ slc_end = avpriv_find_start_code(slc_start + 4, frm_end, &stc);
+ if (slc_end < frm_end) {
+ slc_end -= 4;
+ }
+
+ init_get_bits(&h->gb, slc_start, (slc_end - slc_start) * 8);
+ if (!check_for_slice(h)) {
+ break;
+ }
+
+ ret = hwaccel->decode_slice(h->avctx, slc_start, slc_end - slc_start);
+ if (ret < 0) {
+ break;
+ }
+ }
+
+ h->gb = gb;
+ skip_bits(&h->gb, (slc_start - frm_start) * 8);
+
+ if (ret < 0)
+ return ret;
+
+ return hwaccel->end_frame(h->avctx);
+}
+
+/**
+ * @brief remove frame out of dpb
+ */
+static void cavs_frame_unref(AVSFrame *frame)
+{
+ /* frame->f can be NULL if context init failed */
+ if (!frame->f || !frame->f->buf[0])
+ return;
+
+ av_buffer_unref(&frame->hwaccel_priv_buf);
+ frame->hwaccel_picture_private = NULL;
+
+ av_frame_unref(frame->f);
+}
+
+static int output_one_frame(AVSContext *h, AVFrame *data, int *got_frame)
+{
+ if (h->out[0].f->buf[0]) {
+ av_log(h->avctx, AV_LOG_DEBUG, "output frame: poc=%d\n", h->out[0].poc);
+ av_frame_move_ref(data, h->out[0].f);
+ *got_frame = 1;
+
+ // out[0] <- out[1] <- out[2] <- out[0]
+ cavs_frame_unref(&h->out[2]);
+ FFSWAP(AVSFrame, h->out[0], h->out[2]);
+ FFSWAP(AVSFrame, h->out[0], h->out[1]);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static void queue_one_frame(AVSContext *h, AVSFrame *out)
+{
+ int idx = !h->out[0].f->buf[0] ? 0 : (!h->out[1].f->buf[0] ? 1 : 2);
+ av_log(h->avctx, AV_LOG_DEBUG, "queue in out[%d]: poc=%d\n", idx, out->poc);
+ av_frame_ref(h->out[idx].f, out->f);
+ h->out[idx].poc = out->poc;
+}
static int decode_pic(AVSContext *h)
{
int ret;
int skip_count = -1;
enum cavs_mb mb_type;
+ char tc[4];
if (!h->top_qp) {
av_log(h->avctx, AV_LOG_ERROR, "No sequence header decoded yet\n");
return AVERROR_INVALIDDATA;
}
- av_frame_unref(h->cur.f);
+ cavs_frame_unref(&h->cur);
+
+ skip_bits(&h->gb, 16);//bbv_delay
+ if (h->profile == AV_PROFILE_CAVS_GUANGDIAN) {
+ skip_bits(&h->gb, 8);//bbv_dwlay_extension
+ }
- skip_bits(&h->gb, 16);//bbv_dwlay
if (h->stc == PIC_PB_START_CODE) {
h->cur.f->pict_type = get_bits(&h->gb, 2) + AV_PICTURE_TYPE_I;
if (h->cur.f->pict_type > AV_PICTURE_TYPE_B) {
av_log(h->avctx, AV_LOG_ERROR, "illegal picture type\n");
return AVERROR_INVALIDDATA;
}
+
/* make sure we have the reference frames we need */
- if (!h->DPB[0].f->data[0] ||
- (!h->DPB[1].f->data[0] && h->cur.f->pict_type == AV_PICTURE_TYPE_B))
+ if (!h->DPB[0].f->buf[0] ||
+ (!h->DPB[1].f->buf[0] && h->cur.f->pict_type == AV_PICTURE_TYPE_B)) {
+ av_log(h->avctx, AV_LOG_ERROR, "Invalid reference frame\n");
return AVERROR_INVALIDDATA;
+ }
} else {
h->cur.f->pict_type = AV_PICTURE_TYPE_I;
- if (get_bits1(&h->gb))
- skip_bits(&h->gb, 24);//time_code
- /* old sample clips were all progressive and no low_delay,
- bump stream revision if detected otherwise */
- if (h->low_delay || !(show_bits(&h->gb, 9) & 1))
- h->stream_revision = 1;
- /* similarly test top_field_first and repeat_first_field */
- else if (show_bits(&h->gb, 11) & 3)
- h->stream_revision = 1;
- if (h->stream_revision > 0)
- skip_bits(&h->gb, 1); //marker_bit
+ if (get_bits1(&h->gb)) { //time_code
+ skip_bits(&h->gb, 1);
+ tc[0] = get_bits(&h->gb, 5);
+ tc[1] = get_bits(&h->gb, 6);
+ tc[2] = get_bits(&h->gb, 6);
+ tc[3] = get_bits(&h->gb, 6);
+ av_log(h->avctx, AV_LOG_DEBUG, "timecode: %d:%d:%d.%d\n",
+ tc[0], tc[1], tc[2], tc[3]);
+ }
+
+ skip_bits(&h->gb, 1);
}
if (get_bits_left(&h->gb) < 23)
@@ -1029,6 +1268,17 @@ static int decode_pic(AVSContext *h)
if (ret < 0)
return ret;
+ if (h->avctx->hwaccel) {
+ const FFHWAccel *hwaccel = ffhwaccel(h->avctx->hwaccel);
+ av_assert0(!h->cur.hwaccel_picture_private);
+ if (hwaccel->frame_priv_data_size) {
+ h->cur.hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+ if (!h->cur.hwaccel_priv_buf)
+ return AVERROR(ENOMEM);
+ h->cur.hwaccel_picture_private = h->cur.hwaccel_priv_buf->data;
+ }
+ }
+
if (!h->edge_emu_buffer) {
int alloc_size = FFALIGN(FFABS(h->cur.f->linesize[0]) + 32, 32);
h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 24);
@@ -1039,6 +1289,8 @@ static int decode_pic(AVSContext *h)
if ((ret = ff_cavs_init_pic(h)) < 0)
return ret;
h->cur.poc = get_bits(&h->gb, 8) * 2;
+ av_log(h->avctx, AV_LOG_DEBUG, "poc=%d, type=%d\n",
+ h->cur.poc, h->cur.f->pict_type);
/* get temporal distances and MV scaling factors */
if (h->cur.f->pict_type != AV_PICTURE_TYPE_B) {
@@ -1052,8 +1304,12 @@ static int decode_pic(AVSContext *h)
if (h->cur.f->pict_type == AV_PICTURE_TYPE_B) {
h->sym_factor = h->dist[0] * h->scale_den[1];
if (FFABS(h->sym_factor) > 32768) {
+ av_log(h->avctx, AV_LOG_ERROR, "poc=%d/%d/%d, dist=%d/%d\n",
+ h->DPB[1].poc, h->DPB[0].poc, h->cur.poc, h->dist[0], h->dist[1]);
av_log(h->avctx, AV_LOG_ERROR, "sym_factor %d too large\n", h->sym_factor);
- return AVERROR_INVALIDDATA;
+
+ if (!h->avctx->hwaccel)
+ return AVERROR_INVALIDDATA;
}
} else {
h->direct_den[0] = h->dist[0] ? 16384 / h->dist[0] : 0;
@@ -1062,9 +1318,9 @@ static int decode_pic(AVSContext *h)
if (h->low_delay)
get_ue_golomb(&h->gb); //bbv_check_times
- h->progressive = get_bits1(&h->gb);
+ h->progressive_frame = get_bits1(&h->gb);
h->pic_structure = 1;
- if (!h->progressive)
+ if (!h->progressive_frame)
h->pic_structure = get_bits1(&h->gb);
if (!h->pic_structure && h->stc == PIC_PB_START_CODE)
skip_bits1(&h->gb); //advanced_pred_mode_disable
@@ -1073,14 +1329,18 @@ static int decode_pic(AVSContext *h)
h->pic_qp_fixed =
h->qp_fixed = get_bits1(&h->gb);
h->qp = get_bits(&h->gb, 6);
+ h->skip_mode_flag = 0;
+ h->ref_flag = 0;
if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
- if (!h->progressive && !h->pic_structure)
- skip_bits1(&h->gb);//what is this?
+ if (!h->progressive_frame && !h->pic_structure)
+ h->skip_mode_flag = get_bits1(&h->gb);
skip_bits(&h->gb, 4); //reserved bits
} else {
if (!(h->cur.f->pict_type == AV_PICTURE_TYPE_B && h->pic_structure == 1))
h->ref_flag = get_bits1(&h->gb);
- skip_bits(&h->gb, 4); //reserved bits
+ h->no_forward_ref_flag = get_bits1(&h->gb);
+ h->pb_field_enhanced_flag = get_bits1(&h->gb);
+ skip_bits(&h->gb, 2); //reserved bits
h->skip_mode_flag = get_bits1(&h->gb);
}
h->loop_filter_disable = get_bits1(&h->gb);
@@ -1096,8 +1356,46 @@ static int decode_pic(AVSContext *h)
h->alpha_offset = h->beta_offset = 0;
}
+ h->weight_quant_flag = 0;
+ if (h->profile == AV_PROFILE_CAVS_GUANGDIAN) {
+ h->weight_quant_flag = get_bits1(&h->gb);
+ if (h->weight_quant_flag) {
+ int wq_param[6] = {128, 128, 128, 128, 128, 128};
+ int i, wqp_index, wq_model;
+ const uint8_t *m2p;
+
+ skip_bits1(&h->gb);
+ if (!get_bits1(&h->gb)) {
+ h->chroma_quant_param_delta_cb = get_se_golomb(&h->gb);
+ h->chroma_quant_param_delta_cr = get_se_golomb(&h->gb);
+ }
+ wqp_index = get_bits(&h->gb, 2);
+ wq_model = get_bits(&h->gb, 2);
+ m2p = wq_model_2_param[wq_model];
+
+ for (i = 0; i < 6; i++) {
+ int delta = (wqp_index == 1 || wqp_index == 2) ? get_se_golomb(&h->gb) : 0;
+ wq_param[i] = default_wq_param[wqp_index][i] + delta;
+ av_log(h->avctx, AV_LOG_DEBUG, "wqp[%d]=%d\n", i, wq_param[i]);
+ }
+ for (i = 0; i < 64; i++) {
+ h->wqm_8x8[i] = wq_param[ m2p[i] ];
+ }
+ } else {
+ memset(h->wqm_8x8, 128, sizeof(h->wqm_8x8));
+ }
+ h->aec_flag = get_bits1(&h->gb);
+ av_log(h->avctx, AV_LOG_DEBUG, "wq_flag=%d, aec_flag=%d\n",
+ h->weight_quant_flag, h->aec_flag);
+ }
+
+ skip_stuffing_bits(h);
+ skip_extension_and_user_data(h);
+
ret = 0;
- if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
+ if (h->avctx->hwaccel) {
+ ret = hwaccel_pic(h);
+ } else if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
do {
check_for_slice(h);
ret = decode_mb_i(h, 0);
@@ -1160,11 +1458,6 @@ static int decode_pic(AVSContext *h)
} while (ff_cavs_next_mb(h));
}
emms_c();
- if (ret >= 0 && h->cur.f->pict_type != AV_PICTURE_TYPE_B) {
- av_frame_unref(h->DPB[1].f);
- FFSWAP(AVSFrame, h->cur, h->DPB[1]);
- FFSWAP(AVSFrame, h->DPB[0], h->DPB[1]);
- }
return ret;
}
@@ -1181,13 +1474,8 @@ static int decode_seq_header(AVSContext *h)
int ret;
h->profile = get_bits(&h->gb, 8);
- if (h->profile != 0x20) {
- avpriv_report_missing_feature(h->avctx,
- "only supprt JiZhun profile");
- return AVERROR_PATCHWELCOME;
- }
h->level = get_bits(&h->gb, 8);
- skip_bits1(&h->gb); //progressive sequence
+ h->progressive_seq = get_bits1(&h->gb);
width = get_bits(&h->gb, 14);
height = get_bits(&h->gb, 14);
@@ -1214,6 +1502,9 @@ static int decode_seq_header(AVSContext *h)
skip_bits1(&h->gb); //marker_bit
skip_bits(&h->gb, 12); //bit_rate_upper
h->low_delay = get_bits1(&h->gb);
+ av_log(h->avctx, AV_LOG_DEBUG,
+ "seq: profile=0x%02x, level=0x%02x, size=%dx%d, low_delay=%d\n",
+ h->profile, h->level, width, height, h->low_delay);
ret = ff_set_dimensions(h->avctx, width, height);
if (ret < 0)
@@ -1239,43 +1530,61 @@ static int cavs_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
int *got_frame, AVPacket *avpkt)
{
AVSContext *h = avctx->priv_data;
- const uint8_t *buf = avpkt->data;
- int buf_size = avpkt->size;
uint32_t stc = -1;
int input_size, ret;
const uint8_t *buf_end;
const uint8_t *buf_ptr;
int frame_start = 0;
- if (buf_size == 0) {
- if (!h->low_delay && h->DPB[0].f->data[0]) {
- *got_frame = 1;
- av_frame_move_ref(rframe, h->DPB[0].f);
+ if (avpkt->size == 0) {
+ if (h->DPB[0].f->buf[0] && !h->DPB[0].outputed) {
+ queue_one_frame(h, &h->DPB[0]);
+ cavs_frame_unref(&h->DPB[0]);
}
+ output_one_frame(h, rframe, got_frame);
return 0;
}
h->stc = 0;
- buf_ptr = buf;
- buf_end = buf + buf_size;
- for(;;) {
+ buf_ptr = avpkt->data;
+ buf_end = avpkt->data + avpkt->size;
+ for(; buf_ptr < buf_end;) {
buf_ptr = avpriv_find_start_code(buf_ptr, buf_end, &stc);
if ((stc & 0xFFFFFE00) || buf_ptr == buf_end) {
if (!h->stc)
av_log(h->avctx, AV_LOG_WARNING, "no frame decoded\n");
- return FFMAX(0, buf_ptr - buf);
+ return FFMAX(0, buf_ptr - avpkt->data);
}
input_size = (buf_end - buf_ptr) * 8;
+ av_log(h->avctx, AV_LOG_TRACE, "Found start code 0x%04x, sz=%d\n",
+ stc, input_size / 8);
switch (stc) {
case CAVS_START_CODE:
init_get_bits(&h->gb, buf_ptr, input_size);
- decode_seq_header(h);
+ if ((ret = decode_seq_header(h)) < 0)
+ return ret;
+ avctx->profile = h->profile;
+ avctx->level = h->level;
+ if (!h->got_pix_fmt) {
+ h->got_pix_fmt = 1;
+ ret = ff_get_format(avctx, avctx->codec->pix_fmts);
+ if (ret < 0)
+ return ret;
+
+ avctx->pix_fmt = ret;
+
+ if (h->profile == AV_PROFILE_CAVS_GUANGDIAN && !avctx->hwaccel) {
+ av_log(avctx, AV_LOG_ERROR, "Your platform doesn't suppport hardware"
+ " accelerated for CAVS Guangdian Profile decoding.\n");
+ return AVERROR(ENOTSUP);
+ }
+ }
break;
case PIC_I_START_CODE:
if (!h->got_keyframe) {
- av_frame_unref(h->DPB[0].f);
- av_frame_unref(h->DPB[1].f);
+ cavs_frame_unref(&h->DPB[0]);
+ cavs_frame_unref(&h->DPB[1]);
h->got_keyframe = 1;
}
case PIC_PB_START_CODE:
@@ -1285,23 +1594,39 @@ static int cavs_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
if (*got_frame)
av_frame_unref(rframe);
*got_frame = 0;
- if (!h->got_keyframe)
+ if (!h->got_keyframe) {
+ av_log(avctx, AV_LOG_ERROR, "No keyframe decoded before P/B frame.\n");
break;
+ }
init_get_bits(&h->gb, buf_ptr, input_size);
h->stc = stc;
- if (decode_pic(h))
- break;
- *got_frame = 1;
+ if ((ret = decode_pic(h)) < 0)
+ return ret;
+ buf_ptr = align_get_bits(&h->gb);
+
+ h->cur.outputed = 0;
if (h->cur.f->pict_type != AV_PICTURE_TYPE_B) {
- if (h->DPB[!h->low_delay].f->data[0]) {
- if ((ret = av_frame_ref(rframe, h->DPB[!h->low_delay].f)) < 0)
- return ret;
- } else {
- *got_frame = 0;
+ // at most one delay
+ if (h->DPB[0].f->buf[0] && !h->DPB[0].outputed) {
+ queue_one_frame(h, &h->DPB[0]);
+ h->DPB[0].outputed = 1;
+ }
+
+ if (h->low_delay) {
+ queue_one_frame(h, &h->cur);
+ h->cur.outputed = 1;
}
+
+ // null -> curr -> DPB[0] -> DPB[1]
+ cavs_frame_unref(&h->DPB[1]);
+ FFSWAP(AVSFrame, h->cur, h->DPB[1]);
+ FFSWAP(AVSFrame, h->DPB[0], h->DPB[1]);
} else {
- av_frame_move_ref(rframe, h->cur.f);
+ queue_one_frame(h, &h->cur);
+ cavs_frame_unref(&h->cur);
}
+
+ output_one_frame(h, rframe, got_frame);
break;
case EXT_START_CODE:
//mpeg_decode_extension(avctx, buf_ptr, input_size);
@@ -1309,16 +1634,34 @@ static int cavs_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
case USER_START_CODE:
//mpeg_decode_user_data(avctx, buf_ptr, input_size);
break;
+ case VIDEO_EDIT_CODE:
+ av_log(h->avctx, AV_LOG_WARNING, "Skip video_edit_code\n");
+ break;
+ case VIDEO_SEQ_END_CODE:
+ av_log(h->avctx, AV_LOG_WARNING, "Skip video_sequence_end_code\n");
+ break;
default:
if (stc <= SLICE_MAX_START_CODE) {
+ h->stc = stc & 0xff;
init_get_bits(&h->gb, buf_ptr, input_size);
decode_slice_header(h, &h->gb);
+ } else {
+ av_log(h->avctx, AV_LOG_WARNING, "Skip unsupported start code 0x%04X\n", stc);
}
break;
}
}
+ return (buf_ptr - avpkt->data);
}
+static const enum AVPixelFormat cavs_hwaccel_pixfmt_list_420[] = {
+#if CONFIG_CAVS_VAAPI_HWACCEL
+ AV_PIX_FMT_VAAPI,
+#endif
+ AV_PIX_FMT_YUV420P,
+ AV_PIX_FMT_NONE
+};
+
const FFCodec ff_cavs_decoder = {
.p.name = "cavs",
CODEC_LONG_NAME("Chinese AVS (Audio Video Standard) (AVS1-P2, JiZhun profile)"),
@@ -1331,4 +1674,12 @@ const FFCodec ff_cavs_decoder = {
.p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
.flush = cavs_flush,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
+ .p.pix_fmts = cavs_hwaccel_pixfmt_list_420,
+ .hw_configs = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_CAVS_VAAPI_HWACCEL
+ HWACCEL_VAAPI(cavs),
+#endif
+ NULL
+ },
+ .p.profiles = NULL_IF_CONFIG_SMALL(ff_cavs_profiles),
};
diff --git a/libavcodec/defs.h b/libavcodec/defs.h
index 00d840ec19..d59816a70f 100644
--- a/libavcodec/defs.h
+++ b/libavcodec/defs.h
@@ -192,6 +192,9 @@
#define AV_PROFILE_EVC_BASELINE 0
#define AV_PROFILE_EVC_MAIN 1
+#define AV_PROFILE_CAVS_JIZHUN 0x20
+#define AV_PROFILE_CAVS_GUANGDIAN 0x48
+
#define AV_LEVEL_UNKNOWN -99
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 5171e4c7d7..a1a973b460 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -89,5 +89,6 @@ extern const struct FFHWAccel ff_wmv3_dxva2_hwaccel;
extern const struct FFHWAccel ff_wmv3_nvdec_hwaccel;
extern const struct FFHWAccel ff_wmv3_vaapi_hwaccel;
extern const struct FFHWAccel ff_wmv3_vdpau_hwaccel;
+extern const struct FFHWAccel ff_cavs_vaapi_hwaccel;
#endif /* AVCODEC_HWACCELS_H */
diff --git a/libavcodec/profiles.c b/libavcodec/profiles.c
index 5bb8f150e6..b312f12281 100644
--- a/libavcodec/profiles.c
+++ b/libavcodec/profiles.c
@@ -200,4 +200,10 @@ const AVProfile ff_evc_profiles[] = {
{ AV_PROFILE_UNKNOWN },
};
+const AVProfile ff_cavs_profiles[] = {
+ { AV_PROFILE_CAVS_JIZHUN, "Jizhun" },
+ { AV_PROFILE_CAVS_GUANGDIAN, "Guangdian" },
+ { AV_PROFILE_UNKNOWN },
+};
+
#endif /* !CONFIG_SMALL */
diff --git a/libavcodec/profiles.h b/libavcodec/profiles.h
index 270430a48b..9a2b348ad4 100644
--- a/libavcodec/profiles.h
+++ b/libavcodec/profiles.h
@@ -75,5 +75,6 @@ extern const AVProfile ff_prores_profiles[];
extern const AVProfile ff_mjpeg_profiles[];
extern const AVProfile ff_arib_caption_profiles[];
extern const AVProfile ff_evc_profiles[];
+extern const AVProfile ff_cavs_profiles[];
#endif /* AVCODEC_PROFILES_H */
diff --git a/libavcodec/vaapi_cavs.c b/libavcodec/vaapi_cavs.c
new file mode 100644
index 0000000000..4a7a9b95ad
--- /dev/null
+++ b/libavcodec/vaapi_cavs.c
@@ -0,0 +1,164 @@
+/*
+ * AVS (Chinese GY/T 257.1—2012) HW decode acceleration through VA-API
+ * Copyright (c) 2022 JianfengZheng <jianfeng.zheng@mthreads.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "hwconfig.h"
+#include "hwaccel_internal.h"
+#include "vaapi_decode.h"
+#include "cavs.h"
+
+/**
+ * @file
+ * This file implements the glue code between FFmpeg's and VA-API's
+ * structures for AVS (Chinese GY/T 257.1—2012) decoding.
+ */
+
+static int vaapi_avs_pic_type_cvt(int pict_type)
+{
+ switch (pict_type)
+ {
+ case AV_PICTURE_TYPE_I: return VA_AVS_I_IMG;
+ case AV_PICTURE_TYPE_P: return VA_AVS_P_IMG;
+ case AV_PICTURE_TYPE_B: return VA_AVS_B_IMG;
+ default: return VA_AVS_I_IMG;
+ }
+}
+
+static void vaapi_avs_fill_pic(VAPictureAVS *va_pic, const AVSFrame *frame)
+{
+ va_pic->surface_id = ff_vaapi_get_surface_id(frame->f);
+ va_pic->poc = frame->poc / 2;
+}
+
+/** Initialize and start decoding a frame with VA API. */
+static int vaapi_avs_start_frame(AVCodecContext *avctx,
+ av_unused const uint8_t *buffer,
+ av_unused uint32_t size)
+{
+ int i, err;
+ AVSContext *h = avctx->priv_data;
+ VAPictureParameterBufferAVS pic_param = {};
+ VAAPIDecodePicture *vapic = h->cur.hwaccel_picture_private;
+ vapic->output_surface = ff_vaapi_get_surface_id(h->cur.f);
+
+ pic_param = (VAPictureParameterBufferAVS) {
+ .width = h->width,
+ .height = h->height,
+ .picture_type = vaapi_avs_pic_type_cvt(h->cur.f->pict_type),
+ .progressive_seq_flag = h->progressive_seq,
+ .progressive_frame_flag = h->progressive_frame,
+ .picture_structure_flag = h->pic_structure,
+ .fixed_pic_qp_flag = h->qp_fixed,
+ .picture_qp = h->qp,
+ .loop_filter_disable_flag = h->loop_filter_disable,
+ .alpha_c_offset = h->alpha_offset,
+ .beta_offset = h->beta_offset,
+ .skip_mode_flag_flag = h->skip_mode_flag,
+ .picture_reference_flag = h->ref_flag,
+ };
+
+ if (h->profile == 0x48) {
+ pic_param.guangdian_fields.guangdian_flag = 1;
+ pic_param.guangdian_fields.aec_flag = h->aec_flag;
+ pic_param.guangdian_fields.weight_quant_flag = h->weight_quant_flag;
+ pic_param.guangdian_fields.chroma_quant_param_delta_cb = h->chroma_quant_param_delta_cb;
+ pic_param.guangdian_fields.chroma_quant_param_delta_cr = h->chroma_quant_param_delta_cr;
+ memcpy(pic_param.guangdian_fields.wqm_8x8, h->wqm_8x8, 64);
+ }
+
+ vaapi_avs_fill_pic(&pic_param.curr_pic, &h->cur);
+ for (i = 0; i < 2; i++) {
+ vaapi_avs_fill_pic(&pic_param.ref_list[i], &h->DPB[i]);
+ }
+
+ err = ff_vaapi_decode_make_param_buffer(avctx, vapic,
+ VAPictureParameterBufferType,
+ &pic_param, sizeof(pic_param));
+ if (err < 0)
+ goto fail;
+
+ return 0;
+fail:
+ ff_vaapi_decode_cancel(avctx, vapic);
+ return err;
+}
+
+/** End a hardware decoding based frame. */
+static int vaapi_avs_end_frame(AVCodecContext *avctx)
+{
+ AVSContext *h = avctx->priv_data;
+ VAAPIDecodePicture *vapic = h->cur.hwaccel_picture_private;
+ return ff_vaapi_decode_issue(avctx, vapic);
+}
+
+/** Decode the given H.264 slice with VA API. */
+static int vaapi_avs_decode_slice(AVCodecContext *avctx,
+ const uint8_t *buffer,
+ uint32_t size)
+{
+ int err;
+ AVSContext *h = avctx->priv_data;
+ VAAPIDecodePicture *vapic = h->cur.hwaccel_picture_private;
+ VASliceParameterBufferAVS slice_param;
+ slice_param = (VASliceParameterBufferAVS) {
+ .slice_data_size = size,
+ .slice_data_offset = 0,
+ .slice_data_flag = VA_SLICE_DATA_FLAG_ALL,
+ .mb_data_bit_offset = get_bits_count(&h->gb),
+ .slice_vertical_pos = h->stc,
+ .fixed_slice_qp_flag = h->qp_fixed,
+ .slice_qp = h->qp,
+ .slice_weight_pred_flag = h->slice_weight_pred_flag,
+ .mb_weight_pred_flag = h->mb_weight_pred_flag,
+ };
+
+ *((uint32_t *)slice_param.luma_scale) = *((uint32_t *)h->luma_scale);
+ *((uint32_t *)slice_param.luma_shift) = *((uint32_t *)h->luma_shift);
+ *((uint32_t *)slice_param.chroma_scale) = *((uint32_t *)h->chroma_scale);
+ *((uint32_t *)slice_param.chroma_shift) = *((uint32_t *)h->chroma_shift);
+
+ err = ff_vaapi_decode_make_slice_buffer(avctx, vapic,
+ &slice_param, sizeof(slice_param),
+ buffer, size);
+ if (err < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ ff_vaapi_decode_cancel(avctx, vapic);
+ return err;
+}
+
+const FFHWAccel ff_cavs_vaapi_hwaccel = {
+ .p.name = "cavs_vaapi",
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_CAVS,
+ .p.pix_fmt = AV_PIX_FMT_VAAPI,
+ .start_frame = &vaapi_avs_start_frame,
+ .end_frame = &vaapi_avs_end_frame,
+ .decode_slice = &vaapi_avs_decode_slice,
+ .frame_priv_data_size = sizeof(VAAPIDecodePicture),
+ .init = &ff_vaapi_decode_init,
+ .uninit = &ff_vaapi_decode_uninit,
+ .frame_params = &ff_vaapi_common_frame_params,
+ .priv_data_size = sizeof(VAAPIDecodeContext),
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
+};
diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
index ceac769c52..13a3f6aa42 100644
--- a/libavcodec/vaapi_decode.c
+++ b/libavcodec/vaapi_decode.c
@@ -408,6 +408,10 @@ static const struct {
H264ConstrainedBaseline),
MAP(H264, H264_MAIN, H264Main ),
MAP(H264, H264_HIGH, H264High ),
+#if HAVE_VA_PROFILE_AVS
+ MAP(CAVS, CAVS_JIZHUN, AVSJizhun ),
+ MAP(CAVS, CAVS_GUANGDIAN, AVSGuangdian),
+#endif
#if VA_CHECK_VERSION(0, 37, 0)
MAP(HEVC, HEVC_MAIN, HEVCMain ),
MAP(HEVC, HEVC_MAIN_10, HEVCMain10 ),
--
2.25.1
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [FFmpeg-devel] [PATCH v1 1/2] vaapi: add vaapi_cavs support
2024-01-19 15:49 [FFmpeg-devel] [PATCH v1 1/2] vaapi: add vaapi_cavs support jianfeng.zheng
@ 2024-01-20 4:20 ` Zhao Zhili
0 siblings, 0 replies; 2+ messages in thread
From: Zhao Zhili @ 2024-01-20 4:20 UTC (permalink / raw)
To: 'FFmpeg development discussions and patches'
Cc: 'jianfeng.zheng'
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of jianfeng.zheng
> Sent: 2024年1月19日 23:50
> To: ffmpeg-devel@ffmpeg.org
> Cc: jianfeng.zheng <jianfeng.zheng@mthreads.com>
> Subject: [FFmpeg-devel] [PATCH v1 1/2] vaapi: add vaapi_cavs support
>
> see https://github.com/intel/libva/pull/738
>
> [Moore Threads](https://www.mthreads.com) (short for Mthreads) is a
> Chinese GPU manufacturer. All our products, like MTTS70/MTTS80/.. ,
> support AVS/AVS+ HW decoding at max 2k resolution.
Please use description more objective and neutrality.
>
> Signed-off-by: jianfeng.zheng <jianfeng.zheng@mthreads.com>
> ---
> configure | 14 ++
> libavcodec/Makefile | 1 +
> libavcodec/cavs.c | 12 +
> libavcodec/cavs.h | 36 ++-
> libavcodec/cavs_parser.c | 16 ++
> libavcodec/cavsdec.c | 473 +++++++++++++++++++++++++++++++++-----
> libavcodec/defs.h | 3 +
> libavcodec/hwaccels.h | 1 +
> libavcodec/profiles.c | 6 +
> libavcodec/profiles.h | 1 +
> libavcodec/vaapi_cavs.c | 164 +++++++++++++
> libavcodec/vaapi_decode.c | 4 +
> 12 files changed, 669 insertions(+), 62 deletions(-)
> create mode 100644 libavcodec/vaapi_cavs.c
>
> diff --git a/configure b/configure
> index c8ae0a061d..89759eda5d 100755
> --- a/configure
> +++ b/configure
> @@ -2463,6 +2463,7 @@ HAVE_LIST="
> xmllint
> zlib_gzip
> openvino2
> + va_profile_avs
> "
>
> # options emitted with CONFIG_ prefix but not available on the command line
> @@ -3202,6 +3203,7 @@ wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel"
> wmv3_nvdec_hwaccel_select="vc1_nvdec_hwaccel"
> wmv3_vaapi_hwaccel_select="vc1_vaapi_hwaccel"
> wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
> +cavs_vaapi_hwaccel_deps="vaapi va_profile_avs VAPictureParameterBufferAVS"
>
> # hardware-accelerated codecs
> mediafoundation_deps="mftransform_h MFCreateAlignedMemoryBuffer"
> @@ -7175,6 +7177,18 @@ if enabled vaapi; then
> check_type "va/va.h va/va_enc_vp8.h" "VAEncPictureParameterBufferVP8"
> check_type "va/va.h va/va_enc_vp9.h" "VAEncPictureParameterBufferVP9"
> check_type "va/va.h va/va_enc_av1.h" "VAEncPictureParameterBufferAV1"
> +
> + #
> + # Using 'VA_CHECK_VERSION' in source codes make things easy. But we have to wait
> + # until newly added VAProfile being distributed by VAAPI released version.
> + #
> + # Before or after that, we can use auto-detection to keep version compatibility.
> + # It always works.
> + #
> + disable va_profile_avs &&
> + test_code cc va/va.h "VAProfile p1 = VAProfileAVSJizhun, p2 = VAProfileAVSGuangdian;" &&
> + enable va_profile_avs
> + enabled va_profile_avs && check_type "va/va.h va/va_dec_avs.h" "VAPictureParameterBufferAVS"
> fi
>
> if enabled_all opencl libdrm ; then
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index bb42095165..7d92375fed 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1055,6 +1055,7 @@ OBJS-$(CONFIG_VP9_VAAPI_HWACCEL) += vaapi_vp9.o
> OBJS-$(CONFIG_VP9_VDPAU_HWACCEL) += vdpau_vp9.o
> OBJS-$(CONFIG_VP9_VIDEOTOOLBOX_HWACCEL) += videotoolbox_vp9.o
> OBJS-$(CONFIG_VP8_QSV_HWACCEL) += qsvdec.o
> +OBJS-$(CONFIG_CAVS_VAAPI_HWACCEL) += vaapi_cavs.o
>
> # Objects duplicated from other libraries for shared builds
> SHLIBOBJS += log2_tab.o reverse.o
> diff --git a/libavcodec/cavs.c b/libavcodec/cavs.c
> index fdd577f7fb..ed7b278336 100644
> --- a/libavcodec/cavs.c
> +++ b/libavcodec/cavs.c
Please split the patch.
> @@ -810,6 +810,14 @@ av_cold int ff_cavs_init(AVCodecContext *avctx)
> if (!h->cur.f || !h->DPB[0].f || !h->DPB[1].f)
> return AVERROR(ENOMEM);
>
> + h->out[0].f = av_frame_alloc();
> + h->out[1].f = av_frame_alloc();
> + h->out[2].f = av_frame_alloc();
> + if (!h->out[0].f || !h->out[1].f || !h->out[2].f) {
> + ff_cavs_end(avctx);
> + return AVERROR(ENOMEM);
> + }
> +
> h->luma_scan[0] = 0;
> h->luma_scan[1] = 8;
> h->intra_pred_l[INTRA_L_VERT] = intra_pred_vert;
> @@ -840,6 +848,10 @@ av_cold int ff_cavs_end(AVCodecContext *avctx)
> av_frame_free(&h->DPB[0].f);
> av_frame_free(&h->DPB[1].f);
>
> + av_frame_free(&h->out[0].f);
> + av_frame_free(&h->out[1].f);
> + av_frame_free(&h->out[2].f);
> +
> av_freep(&h->top_qp);
> av_freep(&h->top_mv[0]);
> av_freep(&h->top_mv[1]);
> diff --git a/libavcodec/cavs.h b/libavcodec/cavs.h
> index 244c322b35..ef03c1a974 100644
> --- a/libavcodec/cavs.h
> +++ b/libavcodec/cavs.h
> @@ -39,8 +39,10 @@
> #define EXT_START_CODE 0x000001b5
> #define USER_START_CODE 0x000001b2
> #define CAVS_START_CODE 0x000001b0
> +#define VIDEO_SEQ_END_CODE 0x000001b1
> #define PIC_I_START_CODE 0x000001b3
> #define PIC_PB_START_CODE 0x000001b6
> +#define VIDEO_EDIT_CODE 0x000001b7
>
> #define A_AVAIL 1
> #define B_AVAIL 2
> @@ -164,10 +166,15 @@ struct dec_2dvlc {
> typedef struct AVSFrame {
> AVFrame *f;
> int poc;
> + int outputed;
> +
> + AVBufferRef *hwaccel_priv_buf;
> + void *hwaccel_picture_private;
> } AVSFrame;
>
> typedef struct AVSContext {
> AVCodecContext *avctx;
> + int got_pix_fmt;
> BlockDSPContext bdsp;
> H264ChromaContext h264chroma;
> VideoDSPContext vdsp;
> @@ -175,6 +182,7 @@ typedef struct AVSContext {
> GetBitContext gb;
> AVSFrame cur; ///< currently decoded frame
> AVSFrame DPB[2]; ///< reference frames
> + AVSFrame out[3]; ///< output queue, size 2 maybe enough
> int dist[2]; ///< temporal distances from current frame to ref frames
> int low_delay;
> int profile, level;
> @@ -182,12 +190,38 @@ typedef struct AVSContext {
> int mb_width, mb_height;
> int width, height;
> int stream_revision; ///<0 for samples from 2006, 1 for rm52j encoder
> - int progressive;
> + int progressive_seq;
> + int progressive_frame;
> int pic_structure;
> + int no_forward_ref_flag;
> + int pb_field_enhanced_flag; ///< only used in GUANGDIAN
> int skip_mode_flag; ///< select between skip_count or one skip_flag per MB
> int loop_filter_disable;
> int alpha_offset, beta_offset;
> int ref_flag;
> +
> + /** \defgroup guangdian profile
> + * @{
> + */
> + int aec_flag;
> + int weight_quant_flag;
> + int chroma_quant_param_delta_cb;
> + int chroma_quant_param_delta_cr;
> + uint8_t wqm_8x8[64];
> + /**@}*/
> +
> + /** \defgroup slice weighting
> + * FFmpeg don't support slice weighting natively, but maybe needed for HWaccel.
> + * @{
> + */
> + uint32_t slice_weight_pred_flag : 1;
> + uint32_t mb_weight_pred_flag : 1;
> + uint8_t luma_scale[4];
> + int8_t luma_shift[4];
> + uint8_t chroma_scale[4];
> + int8_t chroma_shift[4];
> + /**@}*/
> +
> int mbx, mby, mbidx; ///< macroblock coordinates
> int flags; ///< availability flags of neighbouring macroblocks
> int stc; ///< last start code
> diff --git a/libavcodec/cavs_parser.c b/libavcodec/cavs_parser.c
> index 4a03effd0f..a41a82d8d1 100644
> --- a/libavcodec/cavs_parser.c
> +++ b/libavcodec/cavs_parser.c
> @@ -65,6 +65,22 @@ static int cavs_find_frame_end(ParseContext *pc, const uint8_t *buf,
> pc->state=-1;
> return i-3;
> }
> + if((state&0xFFFFFF00) == 0x100){
> + if(state != EXT_START_CODE && state != USER_START_CODE){
> + state = state >> 8;
> + break;
> + }
> + }
> + }
> + for(; i<buf_size; i++){
> + state= (state<<8) | buf[i];
> + if((state&0xFFFFFF00) == 0x100){
> + if(state > SLICE_MAX_START_CODE){
> + pc->frame_start_found=0;
> + pc->state=-1;
> + return i-3;
> + }
> + }
Why? A parser should split data into frames, not slices.
> }
> }
> pc->frame_start_found= pic_found;
> diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
> index b356da0b04..18c38cd3ff 100644
> --- a/libavcodec/cavsdec.c
> +++ b/libavcodec/cavsdec.c
> @@ -25,11 +25,15 @@
> * @author Stefan Gehrer <stefan.gehrer@gmx.de>
> */
>
> +#include "config_components.h"
> #include "libavutil/avassert.h"
> #include "libavutil/emms.h"
> #include "avcodec.h"
> #include "get_bits.h"
> #include "golomb.h"
> +#include "hwaccel_internal.h"
> +#include "hwconfig.h"
> +#include "profiles.h"
> #include "cavs.h"
> #include "codec_internal.h"
> #include "decode.h"
> @@ -37,6 +41,43 @@
> #include "mpeg12data.h"
> #include "startcode.h"
>
> +static const uint8_t default_wq_param[4][6] = {
> + {128, 98, 106, 116, 116, 128},
> + {135, 143, 143, 160, 160, 213},
> + {128, 98, 106, 116, 116, 128},
> + {128, 128, 128, 128, 128, 128},
> +};
> +static const uint8_t wq_model_2_param[4][64] = {
> + {
> + 0, 0, 0, 4, 4, 4, 5, 5,
> + 0, 0, 3, 3, 3, 3, 5, 5,
> + 0, 3, 2, 2, 1, 1, 5, 5,
> + 4, 3, 2, 2, 1, 5, 5, 5,
> + 4, 3, 1, 1, 5, 5, 5, 5,
> + 4, 3, 1, 5, 5, 5, 5, 5,
> + 5, 5, 5, 5, 5, 5, 5, 5,
> + 5, 5, 5, 5, 5, 5, 5, 5,
> + }, {
> + 0, 0, 0, 4, 4, 4, 5, 5,
> + 0, 0, 4, 4, 4, 4, 5, 5,
> + 0, 3, 2, 2, 2, 1, 5, 5,
> + 3, 3, 2, 2, 1, 5, 5, 5,
> + 3, 3, 2, 1, 5, 5, 5, 5,
> + 3, 3, 1, 5, 5, 5, 5, 5,
> + 5, 5, 5, 5, 5, 5, 5, 5,
> + 5, 5, 5, 5, 5, 5, 5, 5,
> + }, {
> + 0, 0, 0, 4, 4, 3, 5, 5,
> + 0, 0, 4, 4, 3, 2, 5, 5,
> + 0, 4, 4, 3, 2, 1, 5, 5,
> + 4, 4, 3, 2, 1, 5, 5, 5,
> + 4, 3, 2, 1, 5, 5, 5, 5,
> + 3, 2, 1, 5, 5, 5, 5, 5,
> + 5, 5, 5, 5, 5, 5, 5, 5,
> + 5, 5, 5, 5, 5, 5, 5, 5,
> + }
> +};
> +
> static const uint8_t mv_scan[4] = {
> MV_FWD_X0, MV_FWD_X1,
> MV_FWD_X2, MV_FWD_X3
> @@ -927,7 +968,11 @@ static int decode_mb_b(AVSContext *h, enum cavs_mb mb_type)
>
> static inline int decode_slice_header(AVSContext *h, GetBitContext *gb)
> {
> - if (h->stc > 0xAF)
> + int i, nref;
> +
> + av_log(h->avctx, AV_LOG_TRACE, "slice start code 0x%02x\n", h->stc);
> +
> + if (h->stc > SLICE_MAX_START_CODE)
> av_log(h->avctx, AV_LOG_ERROR, "unexpected start code 0x%02x\n", h->stc);
>
> if (h->stc >= h->mb_height) {
> @@ -946,14 +991,119 @@ static inline int decode_slice_header(AVSContext *h, GetBitContext *gb)
> }
> /* inter frame or second slice can have weighting params */
> if ((h->cur.f->pict_type != AV_PICTURE_TYPE_I) ||
> - (!h->pic_structure && h->mby >= h->mb_width / 2))
> - if (get_bits1(gb)) { //slice_weighting_flag
> - av_log(h->avctx, AV_LOG_ERROR,
> - "weighted prediction not yet supported\n");
> + (!h->pic_structure && h->mby >= h->mb_height / 2)) {
> + h->slice_weight_pred_flag = get_bits1(gb);
> + if (h->slice_weight_pred_flag) {
> + nref = h->cur.f->pict_type == AV_PICTURE_TYPE_I ? 1 : (h->pic_structure ? 2 : 4);
> + for (i = 0; i < nref; i++) {
> + h->luma_scale[i] = get_bits(gb, 8);
> + h->luma_shift[i] = get_sbits(gb, 8);
> + skip_bits1(gb);
> + h->chroma_scale[i] = get_bits(gb, 8);
> + h->chroma_shift[i] = get_sbits(gb, 8);
> + skip_bits1(gb);
> + }
> + h->mb_weight_pred_flag = get_bits1(gb);
> + if (!h->avctx->hwaccel) {
> + av_log(h->avctx, AV_LOG_ERROR,
> + "weighted prediction not yet supported\n");
> + }
> }
> + }
> + if (h->aec_flag) {
> + align_get_bits(gb);
> + }
> +
> + return 0;
> +}
> +
> +/**
> + * skip stuffing bits before next start code "0x000001"
> + * @return '0' no stuffing bits placed at h->gb being skip, else '1'.
> + */
> +static inline int skip_stuffing_bits(AVSContext *h)
> +{
> + GetBitContext gb0 = h->gb;
> + GetBitContext *gb = &h->gb;
> + const uint8_t *start;
> + const uint8_t *ptr;
> + const uint8_t *end;
> + int align;
> + int stuffing_zeros;
> +
> +#if 0
> + /**
> + * skip 1 bit stuffing_bit '1' and 0~7 bit stuffing_bit '0'
> + */
> + if (!get_bits1(gb)) {
> + av_log(h->avctx, AV_LOG_WARNING, "NOT stuffing_bit '1'\n");
> + goto restore_get_bits;
> + }
> + align = (-get_bits_count(gb)) & 7;
> + if (show_bits_long(gb, align)) {
> + av_log(h->avctx, AV_LOG_WARNING, "NOT %d stuffing_bit '0..0'\n", align);
> + goto restore_get_bits;
> + }
> +#else
> + /**
> + * Seems like not all the stream follow "next_start_code()" strictly.
> + */
> + align = (-get_bits_count(gb)) & 7;
> + if (align == 0 && show_bits_long(gb, 8) == 0x80) {
> + skip_bits_long(gb, 8);
> + }
> +#endif
> +
> + /**
> + * skip leading zero bytes before 0x 00 00 01 stc
> + */
> + ptr = start = align_get_bits(gb);
> + end = gb->buffer_end;
> + while (ptr < end && *ptr == 0)
> + ptr++;
> +
> + if ((ptr >= end) || (*ptr == 1 && ptr - start >= 2)) {
> + stuffing_zeros = (ptr >= end ? end - start : ptr - start - 2);
> + if (stuffing_zeros > 0)
> + av_log(h->avctx, AV_LOG_DEBUG, "Skip 0x%x stuffing zeros @0x%x.\n",
> + stuffing_zeros, (int)(start - gb->buffer));
> + skip_bits_long(gb, stuffing_zeros * 8);
> + return 1;
> + } else {
> + av_log(h->avctx, AV_LOG_DEBUG, "No next_start_code() found @0x%x.\n",
> + (int)(start - gb->buffer));
> + goto restore_get_bits;
> + }
> +
> +restore_get_bits:
> + h->gb = gb0;
> return 0;
> }
>
> +static inline int skip_extension_and_user_data(AVSContext *h)
> +{
> + int stc = -1;
> + const uint8_t *start = align_get_bits(&h->gb);
> + const uint8_t *end = h->gb.buffer_end;
> + const uint8_t *ptr, *next;
> +
> + for (ptr = start; ptr + 4 < end; ptr = next) {
> + stc = show_bits_long(&h->gb, 32);
> + if (stc != EXT_START_CODE && stc != USER_START_CODE) {
> + break;
> + }
> + next = avpriv_find_start_code(ptr + 4, end, &stc);
> + if (next < end) {
> + next -= 4;
> + }
> + skip_bits(&h->gb, (next - ptr) * 8);
> + av_log(h->avctx, AV_LOG_DEBUG, "skip %d byte ext/user data\n",
> + (int)(next - ptr));
> + }
> +
> + return ptr > start;
> +}
> +
> static inline int check_for_slice(AVSContext *h)
> {
> GetBitContext *gb = &h->gb;
> @@ -981,44 +1131,133 @@ static inline int check_for_slice(AVSContext *h)
> * frame level
> *
> ****************************************************************************/
> +static int hwaccel_pic(AVSContext *h)
> +{
> + int ret = 0;
> + int stc = -1;
> + const uint8_t *frm_start = align_get_bits(&h->gb);
> + const uint8_t *frm_end = h->gb.buffer_end;
> + const uint8_t *slc_start = frm_start;
> + const uint8_t *slc_end = frm_end;
> + GetBitContext gb = h->gb;
> + const FFHWAccel *hwaccel = ffhwaccel(h->avctx->hwaccel);
> +
> + ret = hwaccel->start_frame(h->avctx, NULL, 0);
> + if (ret < 0)
> + return ret;
> +
> + for (slc_start = frm_start; slc_start + 4 < frm_end; slc_start = slc_end) {
> + slc_end = avpriv_find_start_code(slc_start + 4, frm_end, &stc);
> + if (slc_end < frm_end) {
> + slc_end -= 4;
> + }
> +
> + init_get_bits(&h->gb, slc_start, (slc_end - slc_start) * 8);
> + if (!check_for_slice(h)) {
> + break;
> + }
> +
> + ret = hwaccel->decode_slice(h->avctx, slc_start, slc_end - slc_start);
> + if (ret < 0) {
> + break;
> + }
> + }
> +
> + h->gb = gb;
> + skip_bits(&h->gb, (slc_start - frm_start) * 8);
> +
> + if (ret < 0)
> + return ret;
> +
> + return hwaccel->end_frame(h->avctx);
> +}
> +
> +/**
> + * @brief remove frame out of dpb
> + */
> +static void cavs_frame_unref(AVSFrame *frame)
> +{
> + /* frame->f can be NULL if context init failed */
> + if (!frame->f || !frame->f->buf[0])
> + return;
> +
> + av_buffer_unref(&frame->hwaccel_priv_buf);
> + frame->hwaccel_picture_private = NULL;
> +
> + av_frame_unref(frame->f);
> +}
> +
> +static int output_one_frame(AVSContext *h, AVFrame *data, int *got_frame)
> +{
> + if (h->out[0].f->buf[0]) {
> + av_log(h->avctx, AV_LOG_DEBUG, "output frame: poc=%d\n", h->out[0].poc);
> + av_frame_move_ref(data, h->out[0].f);
> + *got_frame = 1;
> +
> + // out[0] <- out[1] <- out[2] <- out[0]
> + cavs_frame_unref(&h->out[2]);
> + FFSWAP(AVSFrame, h->out[0], h->out[2]);
> + FFSWAP(AVSFrame, h->out[0], h->out[1]);
> +
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +static void queue_one_frame(AVSContext *h, AVSFrame *out)
> +{
> + int idx = !h->out[0].f->buf[0] ? 0 : (!h->out[1].f->buf[0] ? 1 : 2);
> + av_log(h->avctx, AV_LOG_DEBUG, "queue in out[%d]: poc=%d\n", idx, out->poc);
> + av_frame_ref(h->out[idx].f, out->f);
> + h->out[idx].poc = out->poc;
> +}
>
> static int decode_pic(AVSContext *h)
> {
> int ret;
> int skip_count = -1;
> enum cavs_mb mb_type;
> + char tc[4];
>
> if (!h->top_qp) {
> av_log(h->avctx, AV_LOG_ERROR, "No sequence header decoded yet\n");
> return AVERROR_INVALIDDATA;
> }
>
> - av_frame_unref(h->cur.f);
> + cavs_frame_unref(&h->cur);
> +
> + skip_bits(&h->gb, 16);//bbv_delay
> + if (h->profile == AV_PROFILE_CAVS_GUANGDIAN) {
> + skip_bits(&h->gb, 8);//bbv_dwlay_extension
> + }
>
> - skip_bits(&h->gb, 16);//bbv_dwlay
> if (h->stc == PIC_PB_START_CODE) {
> h->cur.f->pict_type = get_bits(&h->gb, 2) + AV_PICTURE_TYPE_I;
> if (h->cur.f->pict_type > AV_PICTURE_TYPE_B) {
> av_log(h->avctx, AV_LOG_ERROR, "illegal picture type\n");
> return AVERROR_INVALIDDATA;
> }
> +
> /* make sure we have the reference frames we need */
> - if (!h->DPB[0].f->data[0] ||
> - (!h->DPB[1].f->data[0] && h->cur.f->pict_type == AV_PICTURE_TYPE_B))
> + if (!h->DPB[0].f->buf[0] ||
> + (!h->DPB[1].f->buf[0] && h->cur.f->pict_type == AV_PICTURE_TYPE_B)) {
> + av_log(h->avctx, AV_LOG_ERROR, "Invalid reference frame\n");
> return AVERROR_INVALIDDATA;
> + }
> } else {
> h->cur.f->pict_type = AV_PICTURE_TYPE_I;
> - if (get_bits1(&h->gb))
> - skip_bits(&h->gb, 24);//time_code
> - /* old sample clips were all progressive and no low_delay,
> - bump stream revision if detected otherwise */
> - if (h->low_delay || !(show_bits(&h->gb, 9) & 1))
> - h->stream_revision = 1;
> - /* similarly test top_field_first and repeat_first_field */
> - else if (show_bits(&h->gb, 11) & 3)
> - h->stream_revision = 1;
> - if (h->stream_revision > 0)
> - skip_bits(&h->gb, 1); //marker_bit
> + if (get_bits1(&h->gb)) { //time_code
> + skip_bits(&h->gb, 1);
> + tc[0] = get_bits(&h->gb, 5);
> + tc[1] = get_bits(&h->gb, 6);
> + tc[2] = get_bits(&h->gb, 6);
> + tc[3] = get_bits(&h->gb, 6);
> + av_log(h->avctx, AV_LOG_DEBUG, "timecode: %d:%d:%d.%d\n",
> + tc[0], tc[1], tc[2], tc[3]);
> + }
> +
> + skip_bits(&h->gb, 1);
> }
>
> if (get_bits_left(&h->gb) < 23)
> @@ -1029,6 +1268,17 @@ static int decode_pic(AVSContext *h)
> if (ret < 0)
> return ret;
>
> + if (h->avctx->hwaccel) {
> + const FFHWAccel *hwaccel = ffhwaccel(h->avctx->hwaccel);
> + av_assert0(!h->cur.hwaccel_picture_private);
> + if (hwaccel->frame_priv_data_size) {
> + h->cur.hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
> + if (!h->cur.hwaccel_priv_buf)
> + return AVERROR(ENOMEM);
> + h->cur.hwaccel_picture_private = h->cur.hwaccel_priv_buf->data;
> + }
> + }
> +
> if (!h->edge_emu_buffer) {
> int alloc_size = FFALIGN(FFABS(h->cur.f->linesize[0]) + 32, 32);
> h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 24);
> @@ -1039,6 +1289,8 @@ static int decode_pic(AVSContext *h)
> if ((ret = ff_cavs_init_pic(h)) < 0)
> return ret;
> h->cur.poc = get_bits(&h->gb, 8) * 2;
> + av_log(h->avctx, AV_LOG_DEBUG, "poc=%d, type=%d\n",
> + h->cur.poc, h->cur.f->pict_type);
>
> /* get temporal distances and MV scaling factors */
> if (h->cur.f->pict_type != AV_PICTURE_TYPE_B) {
> @@ -1052,8 +1304,12 @@ static int decode_pic(AVSContext *h)
> if (h->cur.f->pict_type == AV_PICTURE_TYPE_B) {
> h->sym_factor = h->dist[0] * h->scale_den[1];
> if (FFABS(h->sym_factor) > 32768) {
> + av_log(h->avctx, AV_LOG_ERROR, "poc=%d/%d/%d, dist=%d/%d\n",
> + h->DPB[1].poc, h->DPB[0].poc, h->cur.poc, h->dist[0], h->dist[1]);
> av_log(h->avctx, AV_LOG_ERROR, "sym_factor %d too large\n", h->sym_factor);
> - return AVERROR_INVALIDDATA;
> +
> + if (!h->avctx->hwaccel)
> + return AVERROR_INVALIDDATA;
> }
> } else {
> h->direct_den[0] = h->dist[0] ? 16384 / h->dist[0] : 0;
> @@ -1062,9 +1318,9 @@ static int decode_pic(AVSContext *h)
>
> if (h->low_delay)
> get_ue_golomb(&h->gb); //bbv_check_times
> - h->progressive = get_bits1(&h->gb);
> + h->progressive_frame = get_bits1(&h->gb);
> h->pic_structure = 1;
> - if (!h->progressive)
> + if (!h->progressive_frame)
> h->pic_structure = get_bits1(&h->gb);
> if (!h->pic_structure && h->stc == PIC_PB_START_CODE)
> skip_bits1(&h->gb); //advanced_pred_mode_disable
> @@ -1073,14 +1329,18 @@ static int decode_pic(AVSContext *h)
> h->pic_qp_fixed =
> h->qp_fixed = get_bits1(&h->gb);
> h->qp = get_bits(&h->gb, 6);
> + h->skip_mode_flag = 0;
> + h->ref_flag = 0;
> if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
> - if (!h->progressive && !h->pic_structure)
> - skip_bits1(&h->gb);//what is this?
> + if (!h->progressive_frame && !h->pic_structure)
> + h->skip_mode_flag = get_bits1(&h->gb);
> skip_bits(&h->gb, 4); //reserved bits
> } else {
> if (!(h->cur.f->pict_type == AV_PICTURE_TYPE_B && h->pic_structure == 1))
> h->ref_flag = get_bits1(&h->gb);
> - skip_bits(&h->gb, 4); //reserved bits
> + h->no_forward_ref_flag = get_bits1(&h->gb);
> + h->pb_field_enhanced_flag = get_bits1(&h->gb);
> + skip_bits(&h->gb, 2); //reserved bits
> h->skip_mode_flag = get_bits1(&h->gb);
> }
> h->loop_filter_disable = get_bits1(&h->gb);
> @@ -1096,8 +1356,46 @@ static int decode_pic(AVSContext *h)
> h->alpha_offset = h->beta_offset = 0;
> }
>
> + h->weight_quant_flag = 0;
> + if (h->profile == AV_PROFILE_CAVS_GUANGDIAN) {
> + h->weight_quant_flag = get_bits1(&h->gb);
> + if (h->weight_quant_flag) {
> + int wq_param[6] = {128, 128, 128, 128, 128, 128};
> + int i, wqp_index, wq_model;
> + const uint8_t *m2p;
> +
> + skip_bits1(&h->gb);
> + if (!get_bits1(&h->gb)) {
> + h->chroma_quant_param_delta_cb = get_se_golomb(&h->gb);
> + h->chroma_quant_param_delta_cr = get_se_golomb(&h->gb);
> + }
> + wqp_index = get_bits(&h->gb, 2);
> + wq_model = get_bits(&h->gb, 2);
> + m2p = wq_model_2_param[wq_model];
> +
> + for (i = 0; i < 6; i++) {
> + int delta = (wqp_index == 1 || wqp_index == 2) ? get_se_golomb(&h->gb) : 0;
> + wq_param[i] = default_wq_param[wqp_index][i] + delta;
> + av_log(h->avctx, AV_LOG_DEBUG, "wqp[%d]=%d\n", i, wq_param[i]);
> + }
> + for (i = 0; i < 64; i++) {
> + h->wqm_8x8[i] = wq_param[ m2p[i] ];
> + }
> + } else {
> + memset(h->wqm_8x8, 128, sizeof(h->wqm_8x8));
> + }
> + h->aec_flag = get_bits1(&h->gb);
> + av_log(h->avctx, AV_LOG_DEBUG, "wq_flag=%d, aec_flag=%d\n",
> + h->weight_quant_flag, h->aec_flag);
> + }
> +
> + skip_stuffing_bits(h);
> + skip_extension_and_user_data(h);
> +
> ret = 0;
> - if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
> + if (h->avctx->hwaccel) {
> + ret = hwaccel_pic(h);
> + } else if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
> do {
> check_for_slice(h);
> ret = decode_mb_i(h, 0);
> @@ -1160,11 +1458,6 @@ static int decode_pic(AVSContext *h)
> } while (ff_cavs_next_mb(h));
> }
> emms_c();
> - if (ret >= 0 && h->cur.f->pict_type != AV_PICTURE_TYPE_B) {
> - av_frame_unref(h->DPB[1].f);
> - FFSWAP(AVSFrame, h->cur, h->DPB[1]);
> - FFSWAP(AVSFrame, h->DPB[0], h->DPB[1]);
> - }
> return ret;
> }
>
> @@ -1181,13 +1474,8 @@ static int decode_seq_header(AVSContext *h)
> int ret;
>
> h->profile = get_bits(&h->gb, 8);
> - if (h->profile != 0x20) {
> - avpriv_report_missing_feature(h->avctx,
> - "only supprt JiZhun profile");
> - return AVERROR_PATCHWELCOME;
> - }
> h->level = get_bits(&h->gb, 8);
> - skip_bits1(&h->gb); //progressive sequence
> + h->progressive_seq = get_bits1(&h->gb);
>
> width = get_bits(&h->gb, 14);
> height = get_bits(&h->gb, 14);
> @@ -1214,6 +1502,9 @@ static int decode_seq_header(AVSContext *h)
> skip_bits1(&h->gb); //marker_bit
> skip_bits(&h->gb, 12); //bit_rate_upper
> h->low_delay = get_bits1(&h->gb);
> + av_log(h->avctx, AV_LOG_DEBUG,
> + "seq: profile=0x%02x, level=0x%02x, size=%dx%d, low_delay=%d\n",
> + h->profile, h->level, width, height, h->low_delay);
>
> ret = ff_set_dimensions(h->avctx, width, height);
> if (ret < 0)
> @@ -1239,43 +1530,61 @@ static int cavs_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
> int *got_frame, AVPacket *avpkt)
> {
> AVSContext *h = avctx->priv_data;
> - const uint8_t *buf = avpkt->data;
> - int buf_size = avpkt->size;
> uint32_t stc = -1;
> int input_size, ret;
> const uint8_t *buf_end;
> const uint8_t *buf_ptr;
> int frame_start = 0;
>
> - if (buf_size == 0) {
> - if (!h->low_delay && h->DPB[0].f->data[0]) {
> - *got_frame = 1;
> - av_frame_move_ref(rframe, h->DPB[0].f);
> + if (avpkt->size == 0) {
> + if (h->DPB[0].f->buf[0] && !h->DPB[0].outputed) {
> + queue_one_frame(h, &h->DPB[0]);
> + cavs_frame_unref(&h->DPB[0]);
> }
> + output_one_frame(h, rframe, got_frame);
> return 0;
> }
>
> h->stc = 0;
>
> - buf_ptr = buf;
> - buf_end = buf + buf_size;
> - for(;;) {
> + buf_ptr = avpkt->data;
> + buf_end = avpkt->data + avpkt->size;
> + for(; buf_ptr < buf_end;) {
> buf_ptr = avpriv_find_start_code(buf_ptr, buf_end, &stc);
> if ((stc & 0xFFFFFE00) || buf_ptr == buf_end) {
> if (!h->stc)
> av_log(h->avctx, AV_LOG_WARNING, "no frame decoded\n");
> - return FFMAX(0, buf_ptr - buf);
> + return FFMAX(0, buf_ptr - avpkt->data);
> }
> input_size = (buf_end - buf_ptr) * 8;
> + av_log(h->avctx, AV_LOG_TRACE, "Found start code 0x%04x, sz=%d\n",
> + stc, input_size / 8);
> switch (stc) {
> case CAVS_START_CODE:
> init_get_bits(&h->gb, buf_ptr, input_size);
> - decode_seq_header(h);
> + if ((ret = decode_seq_header(h)) < 0)
> + return ret;
> + avctx->profile = h->profile;
> + avctx->level = h->level;
> + if (!h->got_pix_fmt) {
> + h->got_pix_fmt = 1;
> + ret = ff_get_format(avctx, avctx->codec->pix_fmts);
> + if (ret < 0)
> + return ret;
> +
> + avctx->pix_fmt = ret;
> +
> + if (h->profile == AV_PROFILE_CAVS_GUANGDIAN && !avctx->hwaccel) {
> + av_log(avctx, AV_LOG_ERROR, "Your platform doesn't suppport hardware"
> + " accelerated for CAVS Guangdian Profile decoding.\n");
> + return AVERROR(ENOTSUP);
> + }
> + }
> break;
> case PIC_I_START_CODE:
> if (!h->got_keyframe) {
> - av_frame_unref(h->DPB[0].f);
> - av_frame_unref(h->DPB[1].f);
> + cavs_frame_unref(&h->DPB[0]);
> + cavs_frame_unref(&h->DPB[1]);
> h->got_keyframe = 1;
> }
> case PIC_PB_START_CODE:
> @@ -1285,23 +1594,39 @@ static int cavs_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
> if (*got_frame)
> av_frame_unref(rframe);
> *got_frame = 0;
> - if (!h->got_keyframe)
> + if (!h->got_keyframe) {
> + av_log(avctx, AV_LOG_ERROR, "No keyframe decoded before P/B frame.\n");
> break;
> + }
> init_get_bits(&h->gb, buf_ptr, input_size);
> h->stc = stc;
> - if (decode_pic(h))
> - break;
> - *got_frame = 1;
> + if ((ret = decode_pic(h)) < 0)
> + return ret;
> + buf_ptr = align_get_bits(&h->gb);
> +
> + h->cur.outputed = 0;
> if (h->cur.f->pict_type != AV_PICTURE_TYPE_B) {
> - if (h->DPB[!h->low_delay].f->data[0]) {
> - if ((ret = av_frame_ref(rframe, h->DPB[!h->low_delay].f)) < 0)
> - return ret;
> - } else {
> - *got_frame = 0;
> + // at most one delay
> + if (h->DPB[0].f->buf[0] && !h->DPB[0].outputed) {
> + queue_one_frame(h, &h->DPB[0]);
> + h->DPB[0].outputed = 1;
> + }
> +
> + if (h->low_delay) {
> + queue_one_frame(h, &h->cur);
> + h->cur.outputed = 1;
> }
> +
> + // null -> curr -> DPB[0] -> DPB[1]
> + cavs_frame_unref(&h->DPB[1]);
> + FFSWAP(AVSFrame, h->cur, h->DPB[1]);
> + FFSWAP(AVSFrame, h->DPB[0], h->DPB[1]);
> } else {
> - av_frame_move_ref(rframe, h->cur.f);
> + queue_one_frame(h, &h->cur);
> + cavs_frame_unref(&h->cur);
> }
> +
> + output_one_frame(h, rframe, got_frame);
> break;
> case EXT_START_CODE:
> //mpeg_decode_extension(avctx, buf_ptr, input_size);
> @@ -1309,16 +1634,34 @@ static int cavs_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
> case USER_START_CODE:
> //mpeg_decode_user_data(avctx, buf_ptr, input_size);
> break;
> + case VIDEO_EDIT_CODE:
> + av_log(h->avctx, AV_LOG_WARNING, "Skip video_edit_code\n");
> + break;
> + case VIDEO_SEQ_END_CODE:
> + av_log(h->avctx, AV_LOG_WARNING, "Skip video_sequence_end_code\n");
> + break;
> default:
> if (stc <= SLICE_MAX_START_CODE) {
> + h->stc = stc & 0xff;
> init_get_bits(&h->gb, buf_ptr, input_size);
> decode_slice_header(h, &h->gb);
> + } else {
> + av_log(h->avctx, AV_LOG_WARNING, "Skip unsupported start code 0x%04X\n", stc);
> }
> break;
> }
> }
> + return (buf_ptr - avpkt->data);
> }
>
> +static const enum AVPixelFormat cavs_hwaccel_pixfmt_list_420[] = {
> +#if CONFIG_CAVS_VAAPI_HWACCEL
> + AV_PIX_FMT_VAAPI,
> +#endif
> + AV_PIX_FMT_YUV420P,
> + AV_PIX_FMT_NONE
> +};
> +
> const FFCodec ff_cavs_decoder = {
> .p.name = "cavs",
> CODEC_LONG_NAME("Chinese AVS (Audio Video Standard) (AVS1-P2, JiZhun profile)"),
> @@ -1331,4 +1674,12 @@ const FFCodec ff_cavs_decoder = {
> .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> .flush = cavs_flush,
> .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
> + .p.pix_fmts = cavs_hwaccel_pixfmt_list_420,
> + .hw_configs = (const AVCodecHWConfigInternal *const []) {
> +#if CONFIG_CAVS_VAAPI_HWACCEL
> + HWACCEL_VAAPI(cavs),
> +#endif
> + NULL
> + },
> + .p.profiles = NULL_IF_CONFIG_SMALL(ff_cavs_profiles),
> };
> diff --git a/libavcodec/defs.h b/libavcodec/defs.h
> index 00d840ec19..d59816a70f 100644
> --- a/libavcodec/defs.h
> +++ b/libavcodec/defs.h
> @@ -192,6 +192,9 @@
> #define AV_PROFILE_EVC_BASELINE 0
> #define AV_PROFILE_EVC_MAIN 1
>
> +#define AV_PROFILE_CAVS_JIZHUN 0x20
> +#define AV_PROFILE_CAVS_GUANGDIAN 0x48
> +
>
> #define AV_LEVEL_UNKNOWN -99
>
> diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
> index 5171e4c7d7..a1a973b460 100644
> --- a/libavcodec/hwaccels.h
> +++ b/libavcodec/hwaccels.h
> @@ -89,5 +89,6 @@ extern const struct FFHWAccel ff_wmv3_dxva2_hwaccel;
> extern const struct FFHWAccel ff_wmv3_nvdec_hwaccel;
> extern const struct FFHWAccel ff_wmv3_vaapi_hwaccel;
> extern const struct FFHWAccel ff_wmv3_vdpau_hwaccel;
> +extern const struct FFHWAccel ff_cavs_vaapi_hwaccel;
>
> #endif /* AVCODEC_HWACCELS_H */
> diff --git a/libavcodec/profiles.c b/libavcodec/profiles.c
> index 5bb8f150e6..b312f12281 100644
> --- a/libavcodec/profiles.c
> +++ b/libavcodec/profiles.c
> @@ -200,4 +200,10 @@ const AVProfile ff_evc_profiles[] = {
> { AV_PROFILE_UNKNOWN },
> };
>
> +const AVProfile ff_cavs_profiles[] = {
> + { AV_PROFILE_CAVS_JIZHUN, "Jizhun" },
> + { AV_PROFILE_CAVS_GUANGDIAN, "Guangdian" },
> + { AV_PROFILE_UNKNOWN },
> +};
> +
> #endif /* !CONFIG_SMALL */
> diff --git a/libavcodec/profiles.h b/libavcodec/profiles.h
> index 270430a48b..9a2b348ad4 100644
> --- a/libavcodec/profiles.h
> +++ b/libavcodec/profiles.h
> @@ -75,5 +75,6 @@ extern const AVProfile ff_prores_profiles[];
> extern const AVProfile ff_mjpeg_profiles[];
> extern const AVProfile ff_arib_caption_profiles[];
> extern const AVProfile ff_evc_profiles[];
> +extern const AVProfile ff_cavs_profiles[];
>
> #endif /* AVCODEC_PROFILES_H */
> diff --git a/libavcodec/vaapi_cavs.c b/libavcodec/vaapi_cavs.c
> new file mode 100644
> index 0000000000..4a7a9b95ad
> --- /dev/null
> +++ b/libavcodec/vaapi_cavs.c
> @@ -0,0 +1,164 @@
> +/*
> + * AVS (Chinese GY/T 257.1b
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-01-20 4:21 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-19 15:49 [FFmpeg-devel] [PATCH v1 1/2] vaapi: add vaapi_cavs support jianfeng.zheng
2024-01-20 4:20 ` Zhao Zhili
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git