From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by master.gitmailbox.com (Postfix) with ESMTPS id 59F654B882 for ; Thu, 30 Jan 2025 19:42:34 +0000 (UTC) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 97B4F68C17C; Thu, 30 Jan 2025 21:41:49 +0200 (EET) Received: from btbn.de (btbn.de [144.76.60.213]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 7E8B868C0C5 for ; Thu, 30 Jan 2025 21:41:37 +0200 (EET) Received: from [authenticated] by btbn.de (Postfix) with ESMTPSA id E5EB7296C10B4; Thu, 30 Jan 2025 20:41:36 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=rothenpieler.org; s=mail; t=1738266096; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=4/cgP7OG9glJnxVOeIiy8bYKUHTBCP7J/OHc0wOacV0=; b=aqFo3yzlVAOBEcwczF9cer27s9jU2XjNba2jkPPwcq4loEmkZtLB9swJrs8+UTSjHaNnZl nkCZR3Cl8Z+Qwx/hMrh2ukJwDFPG56WA4i6I9qu+R1KzaP3ZAa0aiVUj9PMcUVssUQfJgK sfatiQMUMXhTMvqkwG1xAxq28FtDm50rWl9/c+gzkVQx1aAkYhi2VrBGaokCX1iG73QA8Z ZJo7vlL+/24eu7L8VbUmaeSzWYHT9XQOJIJUepAmTPXz+5/Ow6AQr/xx3R5e9jh0SaJwBI WBiwnBATAbNVwuDToUGaEYesrWYWd7rvNJpmcFypQibnEDX0Zptuo/huun7YTg== From: Timo Rothenpieler To: ffmpeg-devel@ffmpeg.org Date: Thu, 30 Jan 2025 20:40:41 +0100 Message-ID: <20250130194124.21836-5-timo@rothenpieler.org> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20250130194124.21836-1-timo@rothenpieler.org> References: <20250130194124.21836-1-timo@rothenpieler.org> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 4/9] avcodec/nvenc: add 4:2:2 encoding and H.264 10-bit support X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Diego de Souza Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Archived-At: List-Archive: List-Post: From: Diego de Souza This commit adds support for 4:2:2 encoding for HEVC and H.264 on NVIDIA Blackwell GPUs. Additionally, it supports 10-bit encoding for H.264 on Blackwell GPUs. Signed-off-by: Diego de Souza --- libavcodec/nvenc.c | 68 +++++++++++++++++++++++++++++++++++++---- libavcodec/nvenc.h | 12 ++++++++ libavcodec/nvenc_h264.c | 9 ++++++ 3 files changed, 83 insertions(+), 6 deletions(-) diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index 06e3fb81a4..f301269dbd 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -59,6 +59,11 @@ const enum AVPixelFormat ff_nvenc_pix_fmts[] = { AV_PIX_FMT_P010, AV_PIX_FMT_YUV444P, AV_PIX_FMT_P016, // Truncated to 10bits +#ifdef NVENC_HAVE_422_SUPPORT + AV_PIX_FMT_NV16, + AV_PIX_FMT_P210, + AV_PIX_FMT_P216, +#endif AV_PIX_FMT_YUV444P16, // Truncated to 10bits AV_PIX_FMT_0RGB32, AV_PIX_FMT_RGB32, @@ -87,6 +92,8 @@ const AVCodecHWConfigInternal *const ff_nvenc_hw_configs[] = { #define IS_10BIT(pix_fmt) (pix_fmt == AV_PIX_FMT_P010 || \ pix_fmt == AV_PIX_FMT_P016 || \ + pix_fmt == AV_PIX_FMT_P210 || \ + pix_fmt == AV_PIX_FMT_P216 || \ pix_fmt == AV_PIX_FMT_YUV444P16 || \ pix_fmt == AV_PIX_FMT_X2RGB10 || \ pix_fmt == AV_PIX_FMT_X2BGR10 || \ @@ -105,6 +112,10 @@ const AVCodecHWConfigInternal *const ff_nvenc_hw_configs[] = { pix_fmt == AV_PIX_FMT_GBRP16 || \ (ctx->rgb_mode == NVENC_RGB_MODE_444 && IS_RGB(pix_fmt))) +#define IS_YUV422(pix_fmt) (pix_fmt == AV_PIX_FMT_NV16 || \ + pix_fmt == AV_PIX_FMT_P210 || \ + pix_fmt == AV_PIX_FMT_P216) + #define IS_GBRP(pix_fmt) (pix_fmt == AV_PIX_FMT_GBRP || \ pix_fmt == AV_PIX_FMT_GBRP16) @@ -477,6 +488,16 @@ static int nvenc_check_capabilities(AVCodecContext *avctx) return AVERROR(ENOSYS); } +#ifdef NVENC_HAVE_422_SUPPORT + ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV422_ENCODE); +#else + ret = 0; +#endif + if (IS_YUV422(ctx->data_pix_fmt) && ret <= 0) { + av_log(avctx, AV_LOG_WARNING, "YUV422P not supported\n"); + return AVERROR(ENOSYS); + } + ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE); if (ctx->flags & NVENC_LOSSLESS && ret <= 0) { av_log(avctx, AV_LOG_WARNING, "Lossless encoding not supported\n"); @@ -1297,6 +1318,18 @@ static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx) cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; avctx->profile = AV_PROFILE_H264_HIGH; break; +#ifdef NVENC_HAVE_H264_10BIT_SUPPORT + case NV_ENC_H264_PROFILE_HIGH_10: + cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_10_GUID; + avctx->profile = AV_PROFILE_H264_HIGH_10; + break; +#endif +#ifdef NVENC_HAVE_422_SUPPORT + case NV_ENC_H264_PROFILE_HIGH_422: + cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_422_GUID; + avctx->profile = AV_PROFILE_H264_HIGH_422; + break; +#endif case NV_ENC_H264_PROFILE_HIGH_444P: cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID; avctx->profile = AV_PROFILE_H264_HIGH_444_PREDICTIVE; @@ -1304,21 +1337,37 @@ static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx) } } +#ifdef NVENC_HAVE_H264_10BIT_SUPPORT + // force setting profile as high10 if input is 10 bit or if it should be encoded as 10 bit + if (IS_10BIT(ctx->data_pix_fmt) || ctx->highbitdepth) { + cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_10_GUID; + avctx->profile = AV_PROFILE_H264_HIGH_10; + } +#endif + // force setting profile as high444p if input is AV_PIX_FMT_YUV444P if (IS_YUV444(ctx->data_pix_fmt)) { cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID; avctx->profile = AV_PROFILE_H264_HIGH_444_PREDICTIVE; } +#ifdef NVENC_HAVE_422_SUPPORT + // force setting profile as high422p if input is AV_PIX_FMT_YUV422P + if (IS_YUV422(ctx->data_pix_fmt)) { + cc->profileGUID = NV_ENC_H264_PROFILE_HIGH_422_GUID; + avctx->profile = AV_PROFILE_H264_HIGH_422; + } +#endif + vui->bitstreamRestrictionFlag = cc->gopLength != 1 || avctx->profile < AV_PROFILE_H264_HIGH; - h264->chromaFormatIDC = avctx->profile == AV_PROFILE_H264_HIGH_444_PREDICTIVE ? 3 : 1; + h264->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : IS_YUV422(ctx->data_pix_fmt) ? 2 : 1; h264->level = ctx->level; #ifdef NVENC_HAVE_NEW_BIT_DEPTH_API - h264->inputBitDepth = h264->outputBitDepth = - IS_10BIT(ctx->data_pix_fmt) ? NV_ENC_BIT_DEPTH_10 : NV_ENC_BIT_DEPTH_8; + h264->inputBitDepth = IS_10BIT(ctx->data_pix_fmt) ? NV_ENC_BIT_DEPTH_10 : NV_ENC_BIT_DEPTH_8; + h264->outputBitDepth = (IS_10BIT(ctx->data_pix_fmt) || ctx->highbitdepth) ? NV_ENC_BIT_DEPTH_10 : NV_ENC_BIT_DEPTH_8; #endif if (ctx->coder >= 0) @@ -1428,13 +1477,13 @@ static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx) avctx->profile = AV_PROFILE_HEVC_MAIN_10; } - // force setting profile as rext if input is yuv444 - if (IS_YUV444(ctx->data_pix_fmt)) { + // force setting profile as rext if input is yuv444 or yuv422 + if (IS_YUV444(ctx->data_pix_fmt) || IS_YUV422(ctx->data_pix_fmt)) { cc->profileGUID = NV_ENC_HEVC_PROFILE_FREXT_GUID; avctx->profile = AV_PROFILE_HEVC_REXT; } - hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1; + hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : IS_YUV422(ctx->data_pix_fmt) ? 2 : 1; #ifdef NVENC_HAVE_NEW_BIT_DEPTH_API hevc->inputBitDepth = IS_10BIT(ctx->data_pix_fmt) ? NV_ENC_BIT_DEPTH_10 : NV_ENC_BIT_DEPTH_8; @@ -1821,6 +1870,13 @@ static NV_ENC_BUFFER_FORMAT nvenc_map_buffer_format(enum AVPixelFormat pix_fmt) return NV_ENC_BUFFER_FORMAT_ARGB10; case AV_PIX_FMT_X2BGR10: return NV_ENC_BUFFER_FORMAT_ABGR10; +#ifdef NVENC_HAVE_422_SUPPORT + case AV_PIX_FMT_NV16: + return NV_ENC_BUFFER_FORMAT_NV16; + case AV_PIX_FMT_P210: + case AV_PIX_FMT_P216: + return NV_ENC_BUFFER_FORMAT_P210; +#endif default: return NV_ENC_BUFFER_FORMAT_UNDEFINED; } diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h index 0130b99369..34774b6a72 100644 --- a/libavcodec/nvenc.h +++ b/libavcodec/nvenc.h @@ -93,6 +93,12 @@ typedef void ID3D11Device; #define NVENC_HAVE_UNIDIR_B #endif +// SDK 13.0 compile time feature checks +#if NVENCAPI_CHECK_VERSION(13, 0) +#define NVENC_HAVE_H264_10BIT_SUPPORT +#define NVENC_HAVE_422_SUPPORT +#endif + typedef struct NvencSurface { NV_ENC_INPUT_PTR input_surface; @@ -151,6 +157,12 @@ enum { NV_ENC_H264_PROFILE_BASELINE, NV_ENC_H264_PROFILE_MAIN, NV_ENC_H264_PROFILE_HIGH, +#ifdef NVENC_HAVE_H264_10BIT_SUPPORT + NV_ENC_H264_PROFILE_HIGH_10, +#endif +#ifdef NVENC_HAVE_422_SUPPORT + NV_ENC_H264_PROFILE_HIGH_422, +#endif NV_ENC_H264_PROFILE_HIGH_444P, }; diff --git a/libavcodec/nvenc_h264.c b/libavcodec/nvenc_h264.c index 8cbe4dca39..ca997da209 100644 --- a/libavcodec/nvenc_h264.c +++ b/libavcodec/nvenc_h264.c @@ -61,6 +61,12 @@ static const AVOption options[] = { { "baseline", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_PROFILE_BASELINE }, 0, 0, VE, .unit = "profile" }, { "main", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_PROFILE_MAIN }, 0, 0, VE, .unit = "profile" }, { "high", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_PROFILE_HIGH }, 0, 0, VE, .unit = "profile" }, +#ifdef NVENC_HAVE_H264_10BIT_SUPPORT + { "high10", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_PROFILE_HIGH_10 }, 0, 0, VE, .unit = "profile" }, +#endif +#ifdef NVENC_HAVE_422_SUPPORT + { "high422", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_PROFILE_HIGH_422 }, 0, 0, VE, .unit = "profile" }, +#endif { "high444p", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_H264_PROFILE_HIGH_444P }, 0, 0, VE, .unit = "profile" }, #ifdef NVENC_HAVE_H264_LVL6 { "level", "Set the encoding level restriction", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = NV_ENC_LEVEL_AUTOSELECT }, NV_ENC_LEVEL_AUTOSELECT, NV_ENC_LEVEL_H264_62, VE, .unit = "level" }, @@ -199,6 +205,9 @@ static const AVOption options[] = { { "fullres", "Two Pass encoding is enabled where first Pass is full resolution", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_TWO_PASS_FULL_RESOLUTION }, 0, 0, VE, .unit = "multipass" }, #endif +#ifdef NVENC_HAVE_H264_10BIT_SUPPORT + { "highbitdepth", "Enable 10 bit encode for 8 bit input",OFFSET(highbitdepth),AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE }, +#endif #ifdef NVENC_HAVE_LDKFS { "ldkfs", "Low delay key frame scale; Specifies the Scene Change frame size increase allowed in case of single frame VBV and CBR", OFFSET(ldkfs), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, UCHAR_MAX, VE }, -- 2.45.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".