From: Timo Rothenpieler <timo@rothenpieler.org>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH] avfilter: add libvmaf_cuda
Date: Mon, 28 Aug 2023 20:59:00 +0200
Message-ID: <74ca77b7-5b84-9d4d-2baa-0223418fd9d6@rothenpieler.org> (raw)
In-Reply-To: <CALbjROLxME6uCRC+HdUVyORDUFrvYXPgdvFHu9S20Pe0JEf0sg@mail.gmail.com>
> From f6f0afffadfc5fae97b11b0feb7c1d740b7c86ab Mon Sep 17 00:00:00 2001
> From: Kyle Swanson <kswanson@netflix.com>
> Date: Mon, 28 Aug 2023 11:49:34 -0700
> Subject: [PATCH] avfilter: add libvmaf_cuda
>
> ---
> configure | 4 +
> doc/filters.texi | 26 +++++
> libavfilter/Makefile | 1 +
> libavfilter/allfilters.c | 1 +
> libavfilter/vf_libvmaf.c | 210 +++++++++++++++++++++++++++++++++++++++
> 5 files changed, 242 insertions(+)
>
> diff --git a/configure b/configure
> index bd7f7697c8..6f6c6aaf22 100755
> --- a/configure
> +++ b/configure
> @@ -286,6 +286,7 @@ External library support:
> --enable-libv4l2 enable libv4l2/v4l-utils [no]
> --enable-libvidstab enable video stabilization using vid.stab [no]
> --enable-libvmaf enable vmaf filter via libvmaf [no]
> + --enable-libvmaf-cuda enable cuda vmaf filter via libvmaf [no]
> --enable-libvo-amrwbenc enable AMR-WB encoding via libvo-amrwbenc [no]
> --enable-libvorbis enable Vorbis en/decoding via libvorbis,
> native implementation exists [no]
> @@ -1902,6 +1903,7 @@ EXTERNAL_LIBRARY_LIST="
> libuavs3d
> libv4l2
> libvmaf
> + libvmaf_cuda
> libvorbis
> libvpx
> libwebp
> @@ -3831,6 +3833,7 @@ vflip_vulkan_filter_deps="vulkan spirv_compiler"
> vidstabdetect_filter_deps="libvidstab"
> vidstabtransform_filter_deps="libvidstab"
> libvmaf_filter_deps="libvmaf"
> +libvmaf_cuda_filter_deps="libvmaf cuda_nvcc"
Does this really depend on nvcc?
Does it not work with only ffnvcodec?
> zmq_filter_deps="libzmq"
> zoompan_filter_deps="swscale"
> zscale_filter_deps="libzimg const_nan"
> @@ -6811,6 +6814,7 @@ enabled libuavs3d && require_pkg_config libuavs3d "uavs3d >= 1.1.41" uav
> enabled libv4l2 && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl
> enabled libvidstab && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
> enabled libvmaf && require_pkg_config libvmaf "libvmaf >= 2.0.0" libvmaf.h vmaf_init
> +enabled libvmaf_cuda && require_pkg_config libvmaf "libvmaf >= 2.0.0" libvmaf_cuda.h vmaf_cuda_state_init
> enabled libvo_amrwbenc && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
> enabled libvorbis && require_pkg_config libvorbis vorbis vorbis/codec.h vorbis_info_init &&
> require_pkg_config libvorbisenc vorbisenc vorbis/vorbisenc.h vorbis_encode_init
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 14a6be49ac..eaff3f1ddc 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -16928,6 +16928,32 @@ ffmpeg -i distorted.mpg -i reference.mkv -lavfi "[0:v]settb=AVTB,setpts=PTS-STAR
> @end example
> @end itemize
>
> +@section libvmaf_cuda
> +
> +This is the CUDA variant of the @ref{libvmaf} filter. It only accepts CUDA frames.
> +
> +It requires Netflix's vmaf library (libvmaf) as a pre-requisite.
> +After installing the library it can be enabled using:
> +@code{./configure --enable-nonfree --enable-cuda-nvcc --enable-libvmaf-cuda}.
see above
> +@subsection Examples
> +@itemize
> +
> +@item
> +Basic usage showing CUVID hardware decoding and CUDA scaling with @ref{scale_cuda}:
> +@example
> +ffmpeg \
> + -hwaccel cuda -hwaccel_output_format cuda -codec:v av1_cuvid -i dis.obu \
> + -hwaccel cuda -hwaccel_output_format cuda -codec:v av1_cuvid -i ref.obu \
> + -filter_complex "
> + [0:v]scale_cuda=format=yuv420p[ref]; \
> + [1:v]scale_cuda=format=yuv420p[dis]; \
> + [dis][ref]libvmaf_cuda=log_fmt=json:log_path=output.json
> + " \
> + -f null -
> +@end example
> +@end itemize
> +
> @section limitdiff
> Apply limited difference filter using second and optionally third video stream.
>
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 2fe0033b21..57f5809acb 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -363,6 +363,7 @@ OBJS-$(CONFIG_LENSCORRECTION_FILTER) += vf_lenscorrection.o
> OBJS-$(CONFIG_LENSFUN_FILTER) += vf_lensfun.o
> OBJS-$(CONFIG_LIBPLACEBO_FILTER) += vf_libplacebo.o vulkan.o vulkan_filter.o
> OBJS-$(CONFIG_LIBVMAF_FILTER) += vf_libvmaf.o framesync.o
> +OBJS-$(CONFIG_LIBVMAF_CUDA_FILTER) += vf_libvmaf.o framesync.o
> OBJS-$(CONFIG_LIMITDIFF_FILTER) += vf_limitdiff.o framesync.o
> OBJS-$(CONFIG_LIMITER_FILTER) += vf_limiter.o
> OBJS-$(CONFIG_LOOP_FILTER) += f_loop.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index d4184d6e80..aa49703c6e 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -339,6 +339,7 @@ extern const AVFilter ff_vf_lenscorrection;
> extern const AVFilter ff_vf_lensfun;
> extern const AVFilter ff_vf_libplacebo;
> extern const AVFilter ff_vf_libvmaf;
> +extern const AVFilter ff_vf_libvmaf_cuda;
> extern const AVFilter ff_vf_limitdiff;
> extern const AVFilter ff_vf_limiter;
> extern const AVFilter ff_vf_loop;
> diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c
> index 2586f37d99..d7d853ac3e 100644
> --- a/libavfilter/vf_libvmaf.c
> +++ b/libavfilter/vf_libvmaf.c
> @@ -24,6 +24,8 @@
> * Calculate the VMAF between two input videos.
> */
>
> +#include "config.h"
> +
> #include <libvmaf.h>
>
> #include "libavutil/avstring.h"
> @@ -36,6 +38,13 @@
> #include "internal.h"
> #include "video.h"
>
> +#ifdef CONFIG_LIBVMAF_CUDA
> +#include <libvmaf_cuda.h>
Does this include cuda.h or something like that?
If so, it should probably be included after the cuda hwcontext, to avoid
it doing that.
> +#include "libavutil/hwcontext.h"
> +#include "libavutil/hwcontext_cuda_internal.h"
> +#endif
> +
> typedef struct LIBVMAFContext {
> const AVClass *class;
> FFFrameSync fs;
> @@ -58,6 +67,9 @@ typedef struct LIBVMAFContext {
> unsigned model_cnt;
> unsigned frame_cnt;
> unsigned bpc;
> +#ifdef CONFIG_LIBVMAF_CUDA
> + VmafCudaState *cu_state;
> +#endif
> } LIBVMAFContext;
>
> #define OFFSET(x) offsetof(LIBVMAFContext, x)
> @@ -710,3 +722,201 @@ const AVFilter ff_vf_libvmaf = {
> FILTER_OUTPUTS(libvmaf_outputs),
> FILTER_PIXFMTS_ARRAY(pix_fmts),
> };
> +
> +#ifdef CONFIG_LIBVMAF_CUDA
> +static const enum AVPixelFormat supported_formats[] = {
> + AV_PIX_FMT_YUV420P,
> + AV_PIX_FMT_YUV444P16,
> +};
> +
> +static int format_is_supported(enum AVPixelFormat fmt)
> +{
> + int i;
> +
> + for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
> + if (supported_formats[i] == fmt)
> + return 1;
> + return 0;
> +}
> +
> +static int config_props_cuda(AVFilterLink *outlink)
> +{
> + int err;
> + AVFilterContext *ctx = outlink->src;
> + LIBVMAFContext *s = ctx->priv;
> + AVFilterLink *inlink = ctx->inputs[0];
> + AVHWFramesContext *frames_ctx = (AVHWFramesContext*) inlink->hw_frames_ctx->data;
> + AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
> + CUcontext cu_ctx = device_hwctx->cuda_ctx;
> + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frames_ctx->sw_format);
> +
> + VmafConfiguration cfg = {
> + .log_level = log_level_map(av_log_get_level()),
> + .n_subsample = s->n_subsample,
> + .n_threads = s->n_threads,
> + };
> +
> + VmafCudaPictureConfiguration cuda_pic_cfg = {
> + .pic_params = {
> + .bpc = desc->comp[0].depth,
> + .w = inlink->w,
> + .h = inlink->h,
> + .pix_fmt = pix_fmt_map(frames_ctx->sw_format),
> + },
> + .pic_prealloc_method = VMAF_CUDA_PICTURE_PREALLOCATION_METHOD_DEVICE,
> + };
> +
> + VmafCudaConfiguration cuda_cfg = {
> + .cu_ctx = cu_ctx,
> + };
> +
> + if (!format_is_supported(frames_ctx->sw_format)) {
> + av_log(s, AV_LOG_ERROR,
> + "Unsupported input format: %s\n", desc->name);
> + return AVERROR(EINVAL);
> + }
> +
> + err = vmaf_init(&s->vmaf, cfg);
> + if (err)
> + return AVERROR(EINVAL);
> +
> + err = vmaf_cuda_state_init(&s->cu_state, cuda_cfg);
> + if (err)
> + return AVERROR(EINVAL);
> +
> + err = vmaf_cuda_import_state(s->vmaf, s->cu_state);
> + if (err)
> + return AVERROR(EINVAL);
> +
> + err = vmaf_cuda_preallocate_pictures(s->vmaf, cuda_pic_cfg);
> + if (err < 0)
> + return err;
> +
> + err = parse_deprecated_options(ctx);
> + if (err)
> + return err;
> +
> + err = parse_models(ctx);
> + if (err)
> + return err;
> +
> + err = parse_features(ctx);
> + if (err)
> + return err;
> +
> + return config_output(outlink);
> +}
> +
> +static int copy_picture_data_cuda(VmafContext* vmaf,
> + AVCUDADeviceContext* device_hwctx,
> + AVFrame* src, VmafPicture* dst,
> + enum AVPixelFormat pix_fmt)
> +{
> + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(pix_fmt);
> + CudaFunctions *cu = device_hwctx->internal->cuda_dl;
> +
> + CUDA_MEMCPY2D m = {
> + .srcMemoryType = CU_MEMORYTYPE_DEVICE,
> + .dstMemoryType = CU_MEMORYTYPE_DEVICE,
> + };
> +
> + int err = vmaf_cuda_fetch_preallocated_picture(vmaf, dst);
> + if (err)
> + return AVERROR(ENOMEM);
> +
> + err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
> + if (err)
> + return AVERROR_EXTERNAL;
> +
> + for (unsigned i = 0; i < pix_desc->nb_components; i++) {
> + m.srcDevice = (CUdeviceptr) src->data[i];
> + m.srcPitch = src->linesize[i];
> + m.dstDevice = (CUdeviceptr) dst->data[i];
> + m.dstPitch = dst->stride[i];
> + m.WidthInBytes = dst->w[i] * ((dst->bpc + 7) / 8);
> + m.Height = dst->h[i];
> +
> + err = cu->cuMemcpy2D(&m);
> + if (err)
> + return AVERROR_EXTERNAL;
> + break;
> + }
> +
> + err = cu->cuCtxPopCurrent(NULL);
> + if (err)
> + return AVERROR_EXTERNAL;
> +
> + return 0;
> +}
> +
> +static int do_vmaf_cuda(FFFrameSync* fs)
> +{
> + AVFilterContext* ctx = fs->parent;
> + LIBVMAFContext* s = ctx->priv;
> + AVFilterLink *inlink = ctx->inputs[0];
> + AVHWFramesContext *frames_ctx = (AVHWFramesContext*) inlink->hw_frames_ctx->data;
> + AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
> + VmafPicture pic_ref, pic_dist;
> + AVFrame *ref, *dist;
> +
> + int err = 0;
> +
> + err = ff_framesync_dualinput_get(fs, &dist, &ref);
> + if (err < 0)
> + return err;
> + if (ctx->is_disabled || !ref)
> + return ff_filter_frame(ctx->outputs[0], dist);
> +
> + err = copy_picture_data_cuda(s->vmaf, device_hwctx, ref, &pic_ref,
> + frames_ctx->sw_format);
> + if (err) {
> + av_log(s, AV_LOG_ERROR, "problem during copy_picture_data_cuda.\n");
> + return AVERROR(ENOMEM);
> + }
> +
> + err = copy_picture_data_cuda(s->vmaf, device_hwctx, dist, &pic_dist,
> + frames_ctx->sw_format);
> + if (err) {
> + av_log(s, AV_LOG_ERROR, "problem during copy_picture_data_cuda.\n");
> + return AVERROR(ENOMEM);
> + }
> +
> + err = vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cnt++);
> + if (err) {
> + av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n");
> + return AVERROR(EINVAL);
> + }
> +
> + return ff_filter_frame(ctx->outputs[0], dist);
> +}
> +
> +static av_cold int init_cuda(AVFilterContext *ctx)
> +{
> + LIBVMAFContext *s = ctx->priv;
> + s->fs.on_event = do_vmaf_cuda;
> + return 0;
> +}
> +
> +static const AVFilterPad libvmaf_outputs_cuda[] = {
> + {
> + .name = "default",
> + .type = AVMEDIA_TYPE_VIDEO,
> + .config_props = config_props_cuda,
> + },
> +};
> +
> +const AVFilter ff_vf_libvmaf_cuda = {
> + .name = "libvmaf_cuda",
> + .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
> + .preinit = libvmaf_framesync_preinit,
> + .init = init_cuda,
> + .uninit = uninit,
> + .activate = activate,
> + .priv_size = sizeof(LIBVMAFContext),
> + .priv_class = &libvmaf_class,
> + FILTER_INPUTS(libvmaf_inputs),
> + FILTER_OUTPUTS(libvmaf_outputs_cuda),
> + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA),
> + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};
> +#endif
> --
> 2.24.3 (Apple Git-128)
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2023-08-28 18:59 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-07 17:50 Kyle Swanson
2023-08-14 16:29 ` Kyle Swanson
2023-08-14 17:09 ` Timo Rothenpieler
2023-08-23 20:39 ` Kyle Swanson
2023-08-28 18:54 ` Kyle Swanson
2023-08-28 18:59 ` Timo Rothenpieler [this message]
2023-08-28 20:16 ` Kyle Swanson
2023-08-29 17:09 ` Kyle Swanson
2023-08-29 22:14 ` Andreas Rheinhardt
2023-08-30 16:44 ` Kyle Swanson
2023-08-31 18:39 ` Kyle Swanson
2023-09-05 17:16 ` Kyle Swanson
2023-09-11 17:53 ` Kyle Swanson
2023-09-11 18:51 ` Timo Rothenpieler
2023-09-14 18:59 ` Kyle Swanson
2023-09-14 19:10 ` Timo Rothenpieler
2023-09-15 20:31 ` Kyle Swanson
2023-09-15 22:33 ` Timo Rothenpieler
2023-09-18 16:42 ` Kyle Swanson
2023-09-18 19:21 ` Marvin Scholz
2023-09-18 19:41 ` Timo Rothenpieler
2023-09-18 21:39 ` Kyle Swanson
2023-09-20 20:06 ` Kyle Swanson
2023-09-20 22:54 ` Timo Rothenpieler
2023-09-23 9:50 ` Kyle Swanson
2023-09-23 11:02 ` Timo Rothenpieler
2023-09-25 12:18 ` Kyle Swanson
2023-09-25 16:09 ` Timo Rothenpieler
2023-09-27 17:26 ` Kyle Swanson
2023-08-28 19:05 ` Paul B Mahol
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=74ca77b7-5b84-9d4d-2baa-0223418fd9d6@rothenpieler.org \
--to=timo@rothenpieler.org \
--cc=ffmpeg-devel@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git