[FFmpeg-devel] [PATCH 3/3] libavfilter: add NETINT Quadra HW video filters

From: Steven Zhou <steven.zhou@netint.ca>
To: "ffmpeg-devel@ffmpeg.org" <ffmpeg-devel@ffmpeg.org>
Subject: [FFmpeg-devel] [PATCH 3/3] libavfilter: add NETINT Quadra HW video filters
Date: Wed, 2 Jul 2025 08:11:09 +0000
Message-ID: <YT2PR01MB47013707DB91A5F61DEEF57BE340A@YT2PR01MB4701.CANPRD01.PROD.OUTLOOK.COM> (raw)

Add NETINT Quadra hardware video filters ni_quadra_crop, ni_quadra_drawbox,
ni_quadra_drawtext, ni_quadra_flip, ni_quadra_hvsplus, ni_quadra_hwupload,
ni_quadra_overlay, ni_quadra_pad, ni_quadra_rotate, ni_quadra_scale, and
ni_quadra_split

More information:
https://netint.com/products/quadra-t1a-video-processing-unit/
https://docs.netint.com/vpu/quadra/

Signed-off-by: Steven Zhou <steven.zhou@netint.ca>
---
 configure                           |   13 +
 libavfilter/Makefile                |   12 +
 libavfilter/allfilters.c            |   11 +
 libavfilter/nifilter.c              |  361 +++
 libavfilter/nifilter.h              |   69 +
 libavfilter/vf_crop_ni.c            |  719 ++++++
 libavfilter/vf_drawbox_ni.c         |  831 +++++++
 libavfilter/vf_drawtext_ni.c        | 3401 +++++++++++++++++++++++++++
 libavfilter/vf_flip_ni.c            |  469 ++++
 libavfilter/vf_hvsplus_ni.c         | 1792 ++++++++++++++
 libavfilter/vf_hwupload_ni_quadra.c |  297 +++
 libavfilter/vf_overlay_ni.c         | 1397 +++++++++++
 libavfilter/vf_pad_ni.c             |  703 ++++++
 libavfilter/vf_rotate_ni.c          |  764 ++++++
 libavfilter/vf_scale_ni.c           |  958 ++++++++
 libavfilter/vf_split_ni.c           |  529 +++++
 16 files changed, 12326 insertions(+)
 create mode 100644 libavfilter/nifilter.c
 create mode 100644 libavfilter/nifilter.h
 create mode 100644 libavfilter/vf_crop_ni.c
 create mode 100644 libavfilter/vf_drawbox_ni.c
 create mode 100644 libavfilter/vf_drawtext_ni.c
 create mode 100644 libavfilter/vf_flip_ni.c
 create mode 100644 libavfilter/vf_hvsplus_ni.c
 create mode 100644 libavfilter/vf_hwupload_ni_quadra.c
 create mode 100644 libavfilter/vf_overlay_ni.c
 create mode 100644 libavfilter/vf_pad_ni.c
 create mode 100644 libavfilter/vf_rotate_ni.c
 create mode 100644 libavfilter/vf_scale_ni.c
 create mode 100644 libavfilter/vf_split_ni.c

diff --git a/configure b/configure
index 0a2dda84c9..4d8f438f6c 100755
--- a/configure
+++ b/configure
@@ -3372,6 +3372,19 @@ amf_deps_any="libdl LoadLibrary"
 nvenc_deps="ffnvcodec"
 nvenc_deps_any="libdl LoadLibrary"
 
+hwupload_ni_quadra_filter_deps="ni_quadra"
+drawbox_ni_quadra_filter_deps="ni_quadra"
+drawtext_ni_quadra_filter_deps="ni_quadra libfreetype"
+drawtext_ni_quadra_filter_suggest="libfontconfig libfribidi"
+crop_ni_quadra_filter_deps="ni_quadra"
+overlay_ni_quadra_filter_deps="ni_quadra"
+pad_ni_quadra_filter_deps="ni_quadra"
+rotate_ni_quadra_filter_deps="ni_quadra"
+scale_ni_quadra_filter_deps="ni_quadra"
+split_ni_quadra_filter_deps="ni_quadra"
+hvsplus_ni_quadra_filter_deps="ni_quadra"
+flip_ni_quadra_filter_deps="ni_quadra"
+
 aac_mediacodec_decoder_deps="mediacodec"
 aac_mediacodec_decoder_select="aac_adtstoasc_bsf aac_parser"
 aac_mf_encoder_deps="mediafoundation"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 97f8f17272..e8f278edfe 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -259,6 +259,7 @@ OBJS-$(CONFIG_COREIMAGE_FILTER)              += vf_coreimage.o
 OBJS-$(CONFIG_CORR_FILTER)                   += vf_corr.o framesync.o
 OBJS-$(CONFIG_COVER_RECT_FILTER)             += vf_cover_rect.o lavfutils.o
 OBJS-$(CONFIG_CROP_FILTER)                   += vf_crop.o
+OBJS-$(CONFIG_CROP_NI_QUADRA_FILTER)         += vf_crop_ni.o nifilter.o
 OBJS-$(CONFIG_CROPDETECT_FILTER)             += vf_cropdetect.o edge_common.o
 OBJS-$(CONFIG_CUE_FILTER)                    += f_cue.o
 OBJS-$(CONFIG_CURVES_FILTER)                 += vf_curves.o
@@ -292,9 +293,11 @@ OBJS-$(CONFIG_DNN_DETECT_FILTER)             += vf_dnn_detect.o
 OBJS-$(CONFIG_DNN_PROCESSING_FILTER)         += vf_dnn_processing.o
 OBJS-$(CONFIG_DOUBLEWEAVE_FILTER)            += vf_weave.o
 OBJS-$(CONFIG_DRAWBOX_FILTER)                += vf_drawbox.o
+OBJS-$(CONFIG_DRAWBOX_NI_QUADRA_FILTER)      += vf_drawbox_ni.o
 OBJS-$(CONFIG_DRAWGRAPH_FILTER)              += f_drawgraph.o
 OBJS-$(CONFIG_DRAWGRID_FILTER)               += vf_drawbox.o
 OBJS-$(CONFIG_DRAWTEXT_FILTER)               += vf_drawtext.o textutils.o
+OBJS-$(CONFIG_DRAWTEXT_NI_QUADRA_FILTER)     += vf_drawtext_ni.o
 OBJS-$(CONFIG_EDGEDETECT_FILTER)             += vf_edgedetect.o edge_common.o
 OBJS-$(CONFIG_ELBG_FILTER)                   += vf_elbg.o
 OBJS-$(CONFIG_ENTROPY_FILTER)                += vf_entropy.o
@@ -346,10 +349,12 @@ OBJS-$(CONFIG_HSTACK_FILTER)                 += vf_stack.o framesync.o
 OBJS-$(CONFIG_HSVHOLD_FILTER)                += vf_hsvkey.o
 OBJS-$(CONFIG_HSVKEY_FILTER)                 += vf_hsvkey.o
 OBJS-$(CONFIG_HUE_FILTER)                    += vf_hue.o
+OBJS-$(CONFIG_HVSPLUS_NI_QUADRA_FILTER)      += vf_hvsplus_ni.o
 OBJS-$(CONFIG_HUESATURATION_FILTER)          += vf_huesaturation.o
 OBJS-$(CONFIG_HWDOWNLOAD_FILTER)             += vf_hwdownload.o
 OBJS-$(CONFIG_HWMAP_FILTER)                  += vf_hwmap.o
 OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER)          += vf_hwupload_cuda.o
+OBJS-$(CONFIG_HWUPLOAD_NI_QUADRA_FILTER)     += vf_hwupload_ni_quadra.o
 OBJS-$(CONFIG_HWUPLOAD_FILTER)               += vf_hwupload.o
 OBJS-$(CONFIG_HYSTERESIS_FILTER)             += vf_hysteresis.o framesync.o
 OBJS-$(CONFIG_ICCDETECT_FILTER)              += vf_iccdetect.o fflcms2.o
@@ -413,6 +418,7 @@ OBJS-$(CONFIG_OCR_FILTER)                    += vf_ocr.o
 OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
 OBJS-$(CONFIG_OSCILLOSCOPE_FILTER)           += vf_datascope.o
 OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o framesync.o
+OBJS-$(CONFIG_OVERLAY_NI_QUADRA_FILTER)      += vf_overlay_ni.o framesync.o
 OBJS-$(CONFIG_OVERLAY_CUDA_FILTER)           += vf_overlay_cuda.o framesync.o vf_overlay_cuda.ptx.o \
                                                 cuda/load_helper.o
 OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)         += vf_overlay_opencl.o opencl.o \
@@ -422,6 +428,7 @@ OBJS-$(CONFIG_OVERLAY_VAAPI_FILTER)          += vf_overlay_vaapi.o framesync.o v
 OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER)         += vf_overlay_vulkan.o vulkan.o vulkan_filter.o
 OBJS-$(CONFIG_OWDENOISE_FILTER)              += vf_owdenoise.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
+OBJS-$(CONFIG_PAD_NI_QUADRA_FILTER)          += vf_pad_ni.o nifilter.o
 OBJS-$(CONFIG_PAD_OPENCL_FILTER)             += vf_pad_opencl.o opencl.o opencl/pad.o
 OBJS-$(CONFIG_PALETTEGEN_FILTER)             += vf_palettegen.o palette.o
 OBJS-$(CONFIG_PALETTEUSE_FILTER)             += vf_paletteuse.o framesync.o palette.o
@@ -460,10 +467,12 @@ OBJS-$(CONFIG_ROBERTS_FILTER)                += vf_convolution.o
 OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER)         += vf_convolution_opencl.o opencl.o \
                                                 opencl/convolution.o
 OBJS-$(CONFIG_ROTATE_FILTER)                 += vf_rotate.o
+OBJS-$(CONFIG_ROTATE_NI_QUADRA_FILTER)       += vf_rotate_ni.o nifilter.o
 OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
 OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale_eval.o framesync.o
 OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o scale_eval.o \
                                                 vf_scale_cuda.ptx.o cuda/load_helper.o
+OBJS-$(CONFIG_SCALE_NI_QUADRA_FILTER)        += vf_scale_ni.o nifilter.o
 OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale_eval.o
 OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_vpp_qsv.o
 OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o
@@ -504,6 +513,7 @@ OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o
                                                 opencl/convolution.o
 OBJS-$(CONFIG_SITI_FILTER)                   += vf_siti.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
+OBJS-$(CONFIG_SPLIT_NI_QUADRA_FILTER)        += vf_split_ni.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o qp_table.o
 OBJS-$(CONFIG_SR_FILTER)                     += vf_sr.o
 OBJS-$(CONFIG_SR_AMF_FILTER)                 += vf_sr_amf.o scale_eval.o vf_amf_common.o
@@ -552,6 +562,7 @@ OBJS-$(CONFIG_VAGUEDENOISER_FILTER)          += vf_vaguedenoiser.o
 OBJS-$(CONFIG_VARBLUR_FILTER)                += vf_varblur.o framesync.o
 OBJS-$(CONFIG_VECTORSCOPE_FILTER)            += vf_vectorscope.o
 OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
+OBJS-$(CONFIG_FLIP_NI_QUADRA_FILTER)         += vf_flip_ni.o
 OBJS-$(CONFIG_VFLIP_VULKAN_FILTER)           += vf_flip_vulkan.o vulkan.o
 OBJS-$(CONFIG_VFRDET_FILTER)                 += vf_vfrdet.o
 OBJS-$(CONFIG_VIBRANCE_FILTER)               += vf_vibrance.o
@@ -667,6 +678,7 @@ SKIPHEADERS-$(CONFIG_LIBVIDSTAB)             += vidstabutils.h
 
 SKIPHEADERS-$(CONFIG_AMF)                    += vf_amf_common.h
 SKIPHEADERS-$(CONFIG_QSVVPP)                 += qsvvpp.h stack_internal.h
+SKIPHEADERS-$(CONFIG_NI_QUADRA)              += nifilter.h
 SKIPHEADERS-$(CONFIG_OPENCL)                 += opencl.h
 SKIPHEADERS-$(CONFIG_VAAPI)                  += vaapi_vpp.h stack_internal.h
 SKIPHEADERS-$(CONFIG_VULKAN)                 += vulkan_filter.h
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 3bc045b28f..f013238cdd 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -236,6 +236,7 @@ extern const FFFilter ff_vf_coreimage;
 extern const FFFilter ff_vf_corr;
 extern const FFFilter ff_vf_cover_rect;
 extern const FFFilter ff_vf_crop;
+extern const FFFilter ff_vf_crop_ni_quadra;
 extern const FFFilter ff_vf_cropdetect;
 extern const FFFilter ff_vf_cue;
 extern const FFFilter ff_vf_curves;
@@ -267,9 +268,11 @@ extern const FFFilter ff_vf_dnn_detect;
 extern const FFFilter ff_vf_dnn_processing;
 extern const FFFilter ff_vf_doubleweave;
 extern const FFFilter ff_vf_drawbox;
+extern const FFFilter ff_vf_drawbox_ni_quadra;
 extern const FFFilter ff_vf_drawgraph;
 extern const FFFilter ff_vf_drawgrid;
 extern const FFFilter ff_vf_drawtext;
+extern const FFFilter ff_vf_drawtext_ni_quadra;
 extern const FFFilter ff_vf_edgedetect;
 extern const FFFilter ff_vf_elbg;
 extern const FFFilter ff_vf_entropy;
@@ -321,10 +324,12 @@ extern const FFFilter ff_vf_hstack;
 extern const FFFilter ff_vf_hsvhold;
 extern const FFFilter ff_vf_hsvkey;
 extern const FFFilter ff_vf_hue;
+extern const FFFilter ff_vf_hvsplus_ni_quadra;
 extern const FFFilter ff_vf_huesaturation;
 extern const FFFilter ff_vf_hwdownload;
 extern const FFFilter ff_vf_hwmap;
 extern const FFFilter ff_vf_hwupload;
+extern const FFFilter ff_vf_hwupload_ni_quadra;
 extern const FFFilter ff_vf_hwupload_cuda;
 extern const FFFilter ff_vf_hysteresis;
 extern const FFFilter ff_vf_iccdetect;
@@ -388,6 +393,7 @@ extern const FFFilter ff_vf_ocr;
 extern const FFFilter ff_vf_ocv;
 extern const FFFilter ff_vf_oscilloscope;
 extern const FFFilter ff_vf_overlay;
+extern const FFFilter ff_vf_overlay_ni_quadra;
 extern const FFFilter ff_vf_overlay_opencl;
 extern const FFFilter ff_vf_overlay_qsv;
 extern const FFFilter ff_vf_overlay_vaapi;
@@ -395,6 +401,7 @@ extern const FFFilter ff_vf_overlay_vulkan;
 extern const FFFilter ff_vf_overlay_cuda;
 extern const FFFilter ff_vf_owdenoise;
 extern const FFFilter ff_vf_pad;
+extern const FFFilter ff_vf_pad_ni_quadra;
 extern const FFFilter ff_vf_pad_opencl;
 extern const FFFilter ff_vf_palettegen;
 extern const FFFilter ff_vf_paletteuse;
@@ -431,11 +438,13 @@ extern const FFFilter ff_vf_rgbashift;
 extern const FFFilter ff_vf_roberts;
 extern const FFFilter ff_vf_roberts_opencl;
 extern const FFFilter ff_vf_rotate;
+extern const FFFilter ff_vf_rotate_ni_quadra;
 extern const FFFilter ff_vf_sab;
 extern const FFFilter ff_vf_scale;
 extern const FFFilter ff_vf_vpp_amf;
 extern const FFFilter ff_vf_sr_amf;
 extern const FFFilter ff_vf_scale_cuda;
+extern const FFFilter ff_vf_scale_ni_quadra;
 extern const FFFilter ff_vf_scale_npp;
 extern const FFFilter ff_vf_scale_qsv;
 extern const FFFilter ff_vf_scale_vaapi;
@@ -475,6 +484,7 @@ extern const FFFilter ff_vf_smartblur;
 extern const FFFilter ff_vf_sobel;
 extern const FFFilter ff_vf_sobel_opencl;
 extern const FFFilter ff_vf_split;
+extern const FFFilter ff_vf_split_ni_quadra;
 extern const FFFilter ff_vf_spp;
 extern const FFFilter ff_vf_sr;
 extern const FFFilter ff_vf_ssim;
@@ -519,6 +529,7 @@ extern const FFFilter ff_vf_vaguedenoiser;
 extern const FFFilter ff_vf_varblur;
 extern const FFFilter ff_vf_vectorscope;
 extern const FFFilter ff_vf_vflip;
+extern const FFFilter ff_vf_flip_ni_quadra;
 extern const FFFilter ff_vf_vflip_vulkan;
 extern const FFFilter ff_vf_vfrdet;
 extern const FFFilter ff_vf_vibrance;
diff --git a/libavfilter/nifilter.c b/libavfilter/nifilter.c
new file mode 100644
index 0000000000..2008af6c6f
--- /dev/null
+++ b/libavfilter/nifilter.c
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 2020 NetInt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * video common filter routines
+ */
+
+#include <stdio.h>
+
+#include <ni_device_api.h>
+
+#include "avfilter.h"
+#include "nifilter.h"
+#include "formats.h"
+#include "filters.h"
+#include "libavutil/mem.h"
+#include "video.h"
+#include "libavutil/eval.h"
+#include "libavutil/avstring.h"
+#include "libavutil/internal.h"
+#include "libavutil/libm.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/time.h"
+
+#if HAVE_SYS_RESOURCE_H
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#elif HAVE_GETPROCESSTIMES
+#include <windows.h>
+#endif
+
+typedef struct BenchmarkTimeStamps {
+    int64_t real_usec;
+    int64_t user_usec;
+    int64_t sys_usec;
+} BenchmarkTimeStamps;
+
+typedef struct gc620_pixel_fmts {
+    enum AVPixelFormat pix_fmt_ffmpeg;
+    int                pix_fmt_gc620;
+    ni_pix_fmt_t       pix_fmt_libxcoder;
+} gc620_pixel_fmts_t;
+
+static struct gc620_pixel_fmts gc620_pixel_fmt_list[] = {
+    {AV_PIX_FMT_NV12, GC620_NV12, NI_PIX_FMT_NV12},
+    {AV_PIX_FMT_NV21, GC620_NV21, NI_PIX_FMT_NONE},
+    {AV_PIX_FMT_YUV420P, GC620_I420, NI_PIX_FMT_YUV420P},
+    {AV_PIX_FMT_P010LE, GC620_P010_MSB, NI_PIX_FMT_P010LE},
+    {AV_PIX_FMT_YUV420P10LE, GC620_I010, NI_PIX_FMT_YUV420P10LE},
+    {AV_PIX_FMT_YUYV422, GC620_YUYV, NI_PIX_FMT_YUYV422},
+    {AV_PIX_FMT_UYVY422, GC620_UYVY, NI_PIX_FMT_UYVY422},
+    {AV_PIX_FMT_NV16, GC620_NV16, NI_PIX_FMT_NONE},
+    {AV_PIX_FMT_RGBA, GC620_RGBA8888, NI_PIX_FMT_RGBA},
+    {AV_PIX_FMT_BGR0, GC620_BGRX8888, NI_PIX_FMT_BGR0},
+    {AV_PIX_FMT_BGRA, GC620_BGRA8888, NI_PIX_FMT_BGRA},
+    {AV_PIX_FMT_ABGR, GC620_ABGR8888, NI_PIX_FMT_ABGR},
+    {AV_PIX_FMT_ARGB, GC620_ARGB8888, NI_PIX_FMT_ARGB},
+    {AV_PIX_FMT_BGR565LE, GC620_RGB565, NI_PIX_FMT_NONE},
+    {AV_PIX_FMT_RGB565LE, GC620_BGR565, NI_PIX_FMT_NONE},
+    {AV_PIX_FMT_RGB555LE, GC620_B5G5R5X1, NI_PIX_FMT_NONE},
+    {AV_PIX_FMT_NI_QUAD_8_TILE_4X4, GC620_NV12, NI_PIX_FMT_NV12}
+};
+
+static BenchmarkTimeStamps get_benchmark_time_stamps(void)
+{
+    BenchmarkTimeStamps time_stamps = { av_gettime_relative() };
+#if HAVE_GETRUSAGE
+    struct rusage rusage;
+
+    getrusage(RUSAGE_SELF, &rusage);
+    time_stamps.user_usec =
+        (rusage.ru_utime.tv_sec * 1000000LL) + rusage.ru_utime.tv_usec;
+    time_stamps.sys_usec =
+        (rusage.ru_stime.tv_sec * 1000000LL) + rusage.ru_stime.tv_usec;
+#elif HAVE_GETPROCESSTIMES
+    HANDLE proc;
+    FILETIME c, e, k, u;
+    proc = GetCurrentProcess();
+    GetProcessTimes(proc, &c, &e, &k, &u);
+    time_stamps.user_usec =
+        ((int64_t)u.dwHighDateTime << 32 | u.dwLowDateTime) / 10;
+    time_stamps.sys_usec =
+        ((int64_t)k.dwHighDateTime << 32 | k.dwLowDateTime) / 10;
+#else
+    time_stamps.user_usec = time_stamps.sys_usec = 0;
+#endif
+    return time_stamps;
+}
+
+void ff_ni_update_benchmark(const char *fmt, ...)
+{
+    static BenchmarkTimeStamps current_time;
+    va_list va;
+    char buf[1024];
+    BenchmarkTimeStamps t = get_benchmark_time_stamps();
+
+    if (fmt) {
+        va_start(va, fmt);
+        vsnprintf(buf, sizeof(buf), fmt, va);
+        va_end(va);
+        av_log(NULL, AV_LOG_INFO, "bench: %8" PRIu64 " user %8" PRIu64 " "
+               "sys %8" PRIu64 " real %s \n",
+               t.user_usec - current_time.user_usec,
+               t.sys_usec - current_time.sys_usec,
+               t.real_usec - current_time.real_usec, buf);
+    }
+    current_time = t;
+}
+
+int ff_ni_ffmpeg_to_gc620_pix_fmt(enum AVPixelFormat pix_fmt)
+{
+    int i, tablesz;
+
+    tablesz = sizeof(gc620_pixel_fmt_list)/sizeof(struct gc620_pixel_fmts);
+
+    /* linear search through table to find if the pixel format is supported */
+    for (i = 0; i < tablesz; i++) {
+        if (gc620_pixel_fmt_list[i].pix_fmt_ffmpeg == pix_fmt) {
+            return gc620_pixel_fmt_list[i].pix_fmt_gc620;
+        }
+    }
+    return -1;
+}
+
+int ff_ni_ffmpeg_to_libxcoder_pix_fmt(enum AVPixelFormat pix_fmt)
+{
+    int i, tablesz;
+
+    tablesz = sizeof(gc620_pixel_fmt_list)/sizeof(struct gc620_pixel_fmts);
+
+    /* linear search through table to find if the pixel format is supported */
+    for (i = 0; i < tablesz; i++) {
+        if (gc620_pixel_fmt_list[i].pix_fmt_ffmpeg == pix_fmt) {
+            return gc620_pixel_fmt_list[i].pix_fmt_libxcoder;
+        }
+    }
+
+    return -1;
+}
+
+int ff_ni_copy_device_to_host_frame(AVFrame *dst, const ni_frame_t *src, int pix_fmt)
+{
+  switch (pix_fmt)
+  {
+    /* packed */
+    case GC620_RGBA8888:
+    case GC620_BGRA8888:
+    case GC620_ABGR8888:
+    case GC620_ARGB8888:
+    case GC620_RGB565:
+    case GC620_BGR565:
+    case GC620_B5G5R5X1:
+    case GC620_YUYV:
+      memcpy(dst->data[0],src->p_data[0],src->data_len[0]);
+      break;
+
+    /* semi-planar */
+    case GC620_NV12:
+    case GC620_NV21:
+    case GC620_P010_MSB:
+    case GC620_NV16:
+      memcpy(dst->data[0], src->p_data[0], src->data_len[0]);
+      memcpy(dst->data[1], src->p_data[1], src->data_len[1]);
+      break;
+
+    /* planar */
+    case GC620_I420:
+    case GC620_I010:
+      memcpy(dst->data[0], src->p_data[0], src->data_len[0]);
+      memcpy(dst->data[1], src->p_data[1], src->data_len[1]);
+      memcpy(dst->data[2], src->p_data[2], src->data_len[2]);
+      break;
+
+    default:
+      return -1;
+  }
+
+  return 0;
+}
+
+int ff_ni_copy_host_to_device_frame(ni_frame_t *dst, const AVFrame *src, int pix_fmt)
+{
+  switch (pix_fmt)
+  {
+    /* packed */
+    case GC620_RGBA8888:
+    case GC620_BGRA8888:
+    case GC620_ABGR8888:
+    case GC620_ARGB8888:
+    case GC620_RGB565:
+    case GC620_BGR565:
+    case GC620_B5G5R5X1:
+    case GC620_YUYV:
+      memcpy(dst->p_data[0], src->data[0], dst->data_len[0]);
+      dst->pixel_format = pix_fmt;
+      break;
+
+    /* planar */
+    case GC620_I420:
+    case GC620_I010:
+      memcpy(dst->p_data[0], src->data[0], dst->data_len[0]);
+      memcpy(dst->p_data[1], src->data[1], dst->data_len[1]);
+      memcpy(dst->p_data[2], src->data[2], dst->data_len[2]);
+      dst->pixel_format = pix_fmt;
+      break;
+
+    /* semi-planar */
+    case GC620_NV12:
+    case GC620_NV21:
+    case GC620_P010_MSB:
+    case GC620_NV16:
+      memcpy(dst->p_data[0], src->data[0], dst->data_len[0]);
+      memcpy(dst->p_data[0], src->data[0], dst->data_len[0]);
+      dst->pixel_format = pix_fmt;
+      break;
+
+    default:
+      dst->pixel_format = -1;
+      return -1;
+  }
+
+  return 0;
+}
+
+void ff_ni_frame_free(void *opaque, uint8_t *data)
+{
+  int ret;
+
+  if (data)
+  {
+    niFrameSurface1_t* p_data3 = (niFrameSurface1_t*)((uint8_t*)data);
+    if (p_data3->ui16FrameIdx != 0)
+    {
+      av_log(NULL, AV_LOG_DEBUG, "Recycle trace ui16FrameIdx = [%d] DevHandle %d\n", p_data3->ui16FrameIdx, p_data3->device_handle);
+      ret = ni_hwframe_buffer_recycle(p_data3, p_data3->device_handle);
+      if (ret != NI_RETCODE_SUCCESS)
+      {
+        av_log(NULL, AV_LOG_ERROR, "ERROR Failed to recycle trace ui16FrameIdx = [%d] DevHandle %d\n", p_data3->ui16FrameIdx, p_data3->device_handle);
+      }
+    }
+    // buffer is created by av_malloc, so use av_free to release.
+    av_free(data);
+  }
+};
+
+int ff_ni_build_frame_pool(ni_session_context_t *ctx,
+                           int width, int height,
+                           enum AVPixelFormat out_format,
+                           int pool_size,
+                           int buffer_limit)
+{
+  int rc;
+  int scaler_format;
+  int options;
+
+  scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(out_format);
+  options = NI_SCALER_FLAG_IO |  NI_SCALER_FLAG_PC;
+  if (buffer_limit)
+      options |= NI_SCALER_FLAG_LM;
+
+  /* Allocate a pool of frames by the scaler */
+  rc = ni_device_alloc_frame(ctx,
+                             FFALIGN(width,2),
+                             FFALIGN(height,2),
+                             scaler_format,
+                             options,
+                             0, // rec width
+                             0, // rec height
+                             0, // rec X pos
+                             0, // rec Y pos
+                             pool_size, // rgba color/pool size
+                             0, // frame index
+                             NI_DEVICE_TYPE_SCALER);
+
+    return rc;
+}
+
+void ff_ni_set_bit_depth_and_encoding_type(int8_t *p_bit_depth,
+                                           int8_t *p_enc_type,
+                                           enum AVPixelFormat pix_fmt)
+{
+
+    // bit depth is 1 for 8-bit format, 2 for 10-bit format
+    // encoding type should be 1 for planar or packed, 0 for semi-planar
+
+    switch (pix_fmt)
+    {
+        case AV_PIX_FMT_YUV420P:
+            *p_bit_depth = 1;   // 8-bits per component
+            *p_enc_type  = 1;   // planar
+            break;
+
+        case AV_PIX_FMT_YUV420P10LE:
+            *p_bit_depth = 2;   // 10-bits per component
+            *p_enc_type  = 1;   // planar
+            break;
+
+        case AV_PIX_FMT_NV12:
+            *p_bit_depth = 1;   // 8-bits per component
+            *p_enc_type  = 0;   // semi-planar
+            break;
+        case AV_PIX_FMT_NI_QUAD_8_TILE_4X4:
+            *p_bit_depth = 1;   // 8-bits per component
+            *p_enc_type = NI_PIXEL_PLANAR_FORMAT_TILED4X4;   // semi-planar
+            break;
+        case AV_PIX_FMT_P010LE:
+            *p_bit_depth = 2;   // 10-bits per component
+            *p_enc_type  = 0;   // semi-planar
+            break;
+
+        case AV_PIX_FMT_YUYV422:
+        case AV_PIX_FMT_UYVY422:
+            *p_bit_depth = 1;   // 8-bits per component
+            *p_enc_type  = 1;   // packed
+            break;
+
+        case AV_PIX_FMT_NV16:
+            *p_bit_depth = 1;   // 8-bits per component
+            *p_enc_type  = 0;   // semi-planar
+            break;
+
+        case AV_PIX_FMT_RGBA:
+        case AV_PIX_FMT_BGRA:
+        case AV_PIX_FMT_ABGR:
+        case AV_PIX_FMT_ARGB:
+        case AV_PIX_FMT_BGR0:
+            *p_bit_depth = 1;   // 8-bits per component
+            *p_enc_type  = 1;   // packed or planar
+            break;
+
+        default:
+            av_log(NULL, AV_LOG_WARNING, "WARNING: unexpected pix format %s\n",
+                   av_get_pix_fmt_name(pix_fmt));
+
+            // use default values if we've supported a new pixel format
+            *p_bit_depth = 1;   // 8-bits per component
+            *p_enc_type  = 1;   // planar or packed
+            break;
+    }
+}
diff --git a/libavfilter/nifilter.h b/libavfilter/nifilter.h
new file mode 100644
index 0000000000..e92ef3d6a6
--- /dev/null
+++ b/libavfilter/nifilter.h
@@ -0,0 +1,69 @@
+/*
+ * XCoder Filter Lib Wrapper
+ *
+ * Copyright (c) 2020 NetInt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * XCoder codec lib wrapper.
+ */
+
+#ifndef AVFILTER_NIFILTER_H
+#define AVFILTER_NIFILTER_H
+
+#include "version.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_ni_quad.h"
+#include <ni_device_api.h>
+
+#define DEFAULT_NI_FILTER_POOL_SIZE     4
+
+#define NI_FILT_OPTION_KEEPALIVE                                                           \
+    { "keep_alive_timeout", "Specify a custom session keep alive timeout in seconds.",     \
+      OFFSET(keep_alive_timeout), AV_OPT_TYPE_INT, {.i64 = NI_DEFAULT_KEEP_ALIVE_TIMEOUT}, \
+      NI_MIN_KEEP_ALIVE_TIMEOUT, NI_MAX_KEEP_ALIVE_TIMEOUT, FLAGS }
+
+#define NI_FILT_OPTION_KEEPALIVE10                                                         \
+    { "keep_alive_timeout", "Specify a custom session keep alive timeout in seconds.",     \
+      OFFSET(keep_alive_timeout), AV_OPT_TYPE_INT, {.i64 = 10}, NI_MIN_KEEP_ALIVE_TIMEOUT, \
+      NI_MAX_KEEP_ALIVE_TIMEOUT, FLAGS }
+
+#define NI_FILT_OPTION_BUFFER_LIMIT                                                     \
+    { "buffer_limit", "Limit output buffering", OFFSET(buffer_limit), AV_OPT_TYPE_BOOL, \
+      {.i64 = 0}, 0, 1, FLAGS }
+
+#define NI_FILT_OPTION_AUTO_SKIP                                                            \
+    { "auto_skip", "skip processing when output would be same as input", OFFSET(auto_skip), \
+      AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS}
+
+void ff_ni_update_benchmark(const char *fmt, ...);
+int ff_ni_ffmpeg_to_gc620_pix_fmt(enum AVPixelFormat pix_fmt);
+int ff_ni_ffmpeg_to_libxcoder_pix_fmt(enum AVPixelFormat pix_fmt);
+int ff_ni_copy_device_to_host_frame(AVFrame *dst, const ni_frame_t *src, int pix_fmt);
+int ff_ni_copy_host_to_device_frame(ni_frame_t *dst, const AVFrame *src, int pix_fmt);
+int ff_ni_build_frame_pool(ni_session_context_t *ctx,int width,int height, enum AVPixelFormat out_format, int pool_size, int buffer_limit);
+void ff_ni_frame_free(void *opaque, uint8_t *data);
+void ff_ni_set_bit_depth_and_encoding_type(int8_t *p_bit_depth,
+                                           int8_t *p_enc_type,
+                                           enum AVPixelFormat pix_fmt);
+
+#endif
diff --git a/libavfilter/vf_crop_ni.c b/libavfilter/vf_crop_ni.c
new file mode 100644
index 0000000000..2d8683fb45
--- /dev/null
+++ b/libavfilter/vf_crop_ni.c
@@ -0,0 +1,719 @@
+/*
+ * Copyright (c) 2007 Bobby Bingham
+ * Copyright (c) 2020 NetInt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * video crop filter
+ */
+
+#include <stdio.h>
+
+#include "nifilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "libavutil/mem.h"
+#include "fftools/ffmpeg_sched.h"
+#include "video.h"
+#include "libavutil/eval.h"
+#include "libavutil/avstring.h"
+#include "libavutil/internal.h"
+#include "libavutil/libm.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+
+#include <ni_device_api.h>
+
+static const char *const var_names[] = {
+    "in_w", "iw",   ///< width  of the input video
+    "in_h", "ih",   ///< height of the input video
+    "out_w", "ow",  ///< width  of the cropped video
+    "out_h", "oh",  ///< height of the cropped video
+    "a",
+    "sar",
+    "dar",
+    "hsub",
+    "vsub",
+    "x",
+    "y",
+    "n",            ///< number of frame
+#if FF_API_FRAME_PKT
+    "pos",          ///< position in the file
+#endif
+    "t",            ///< timestamp expressed in seconds
+    NULL
+};
+
+enum var_name {
+    VAR_IN_W,  VAR_IW,
+    VAR_IN_H,  VAR_IH,
+    VAR_OUT_W, VAR_OW,
+    VAR_OUT_H, VAR_OH,
+    VAR_A,
+    VAR_SAR,
+    VAR_DAR,
+    VAR_HSUB,
+    VAR_VSUB,
+    VAR_X,
+    VAR_Y,
+    VAR_N,
+#if FF_API_FRAME_PKT
+    VAR_POS,
+#endif
+    VAR_T,
+    VAR_VARS_NB
+};
+
+typedef struct NetIntCropContext {
+    const AVClass *class;
+    int  x;             ///< x offset of the non-cropped area with respect to the input area
+    int  y;             ///< y offset of the non-cropped area with respect to the input area
+    int  w;             ///< width of the cropped area
+    int  h;             ///< height of the cropped area
+
+    AVRational out_sar; ///< output sample aspect ratio
+    int keep_aspect;    ///< keep display aspect ratio when cropping
+
+    int max_step[4];    ///< max pixel step for each plane, expressed as a number of bytes
+    int hsub, vsub;     ///< chroma subsampling
+    char *x_expr, *y_expr, *w_expr, *h_expr;
+    AVExpr *x_pexpr, *y_pexpr;  /* parsed expressions for x and y */
+    double var_values[VAR_VARS_NB];
+
+    AVBufferRef *out_frames_ref;
+
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_dst_frame;
+
+    int initialized;
+    int session_opened;
+    int keep_alive_timeout; /* keep alive timeout setting */
+
+    int auto_skip;
+    int skip_filter;
+    int buffer_limit;
+} NetIntCropContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] =
+        {AV_PIX_FMT_NI_QUAD, AV_PIX_FMT_NONE};
+    AVFilterFormats *formats;
+
+    formats = ff_make_format_list(pix_fmts);
+
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(ctx, formats);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NetIntCropContext *s = ctx->priv;
+
+    av_expr_free(s->x_pexpr);
+    s->x_pexpr = NULL;
+    av_expr_free(s->y_pexpr);
+    s->y_pexpr = NULL;
+
+    if (s->api_dst_frame.data.frame.p_buffer)
+        ni_frame_buffer_free(&s->api_dst_frame.data.frame);
+
+    if (s->session_opened) {
+        /* Close operation will free the device frames */
+        ni_device_session_close(&s->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&s->api_ctx);
+    }
+
+    av_buffer_unref(&s->out_frames_ref);
+}
+
+static inline int normalize_double(int *n, double d)
+{
+    int ret = 0;
+
+    if (isnan(d)) {
+        ret = AVERROR(EINVAL);
+    } else if (d > INT_MAX || d < INT_MIN) {
+        *n = d > INT_MAX ? INT_MAX : INT_MIN;
+        ret = AVERROR(EINVAL);
+    } else {
+        *n = (int)lrint(d);
+    }
+
+    return ret;
+}
+
+static int config_input(AVFilterLink *link)
+{
+    AVFilterContext *ctx = link->dst;
+    AVHWFramesContext *hwctx;
+    NetIntCropContext *s;
+    const AVPixFmtDescriptor *pix_desc;
+    int ret;
+    const char *expr;
+    double res;
+    FilterLink *li = ff_filter_link(link);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    hwctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+    s = ctx->priv;
+    pix_desc = av_pix_fmt_desc_get(hwctx->sw_format);
+
+    s->var_values[VAR_IN_W]  = s->var_values[VAR_IW] = ctx->inputs[0]->w;
+    s->var_values[VAR_IN_H]  = s->var_values[VAR_IH] = ctx->inputs[0]->h;
+    s->var_values[VAR_A] = (double)link->w / (double)link->h;
+    s->var_values[VAR_SAR]   = link->sample_aspect_ratio.num ? av_q2d(link->sample_aspect_ratio) : 1;
+    s->var_values[VAR_DAR]   = s->var_values[VAR_A] * s->var_values[VAR_SAR];
+    s->var_values[VAR_HSUB]  = 1<<pix_desc->log2_chroma_w;
+    s->var_values[VAR_VSUB]  = 1<<pix_desc->log2_chroma_h;
+    s->var_values[VAR_X]     = NAN;
+    s->var_values[VAR_Y]     = NAN;
+    s->var_values[VAR_OUT_W] = s->var_values[VAR_OW] = NAN;
+    s->var_values[VAR_OUT_H] = s->var_values[VAR_OH] = NAN;
+    s->var_values[VAR_N]     = 0;
+    s->var_values[VAR_T]     = NAN;
+#if FF_API_FRAME_PKT
+    s->var_values[VAR_POS]   = NAN;
+#endif
+
+    av_image_fill_max_pixsteps(s->max_step, NULL, pix_desc);
+    s->hsub = pix_desc->log2_chroma_w;
+    s->vsub = pix_desc->log2_chroma_h;
+
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
+                                      var_names, s->var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto fail_expr;
+    s->var_values[VAR_OUT_W] = s->var_values[VAR_OW] = res;
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr),
+                                      var_names, s->var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto fail_expr;
+    s->var_values[VAR_OUT_H] = s->var_values[VAR_OH] = res;
+    /* evaluate again ow as it may depend on oh */
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
+                                      var_names, s->var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto fail_expr;
+
+    s->var_values[VAR_OUT_W] = s->var_values[VAR_OW] = res;
+    if (normalize_double(&s->w, s->var_values[VAR_OUT_W]) < 0 ||
+        normalize_double(&s->h, s->var_values[VAR_OUT_H]) < 0) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Too big value or invalid expression for out_w/ow or out_h/oh. "
+               "Maybe the expression for out_w:'%s' or for out_h:'%s' is self-referencing.\n",
+               s->w_expr, s->h_expr);
+        return AVERROR(EINVAL);
+    }
+
+    s->w &= ~((1 << s->hsub) - 1);
+    s->h &= ~((1 << s->vsub) - 1);
+
+    av_expr_free(s->x_pexpr);
+    s->x_pexpr = NULL;
+    av_expr_free(s->y_pexpr);
+    s->y_pexpr = NULL;
+    if ((av_expr_parse(&s->x_pexpr, s->x_expr, var_names, NULL, NULL, NULL,
+                       NULL, 0, ctx) < 0) ||
+        (av_expr_parse(&s->y_pexpr, s->y_expr, var_names, NULL, NULL, NULL,
+                       NULL, 0, ctx) < 0))
+        return AVERROR(EINVAL);
+
+    if (s->keep_aspect) {
+        AVRational dar = av_mul_q(link->sample_aspect_ratio,
+                                  (AVRational){ link->w, link->h });
+        av_reduce(&s->out_sar.num, &s->out_sar.den,
+                  dar.num * s->h, dar.den * s->w, INT_MAX);
+    } else {
+        s->out_sar = link->sample_aspect_ratio;
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d sar:%d/%d -> w:%d h:%d sar:%d/%d\n",
+           link->w, link->h, link->sample_aspect_ratio.num, link->sample_aspect_ratio.den,
+           s->w, s->h, s->out_sar.num, s->out_sar.den);
+
+    if (s->w <= 0 || s->h <= 0 ||
+        s->w > link->w || s->h > link->h) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Invalid too big or non positive size for width '%d' or height '%d'\n",
+               s->w, s->h);
+        return AVERROR(EINVAL);
+    }
+
+    /* set default, required in the case the first computed value for x/y is NAN */
+    s->x = (link->w - s->w) / 2;
+    s->y = (link->h - s->h) / 2;
+
+    s->x &= ~((1 << s->hsub) - 1);
+    s->y &= ~((1 << s->vsub) - 1);
+
+    return 0;
+
+fail_expr:
+    av_log(NULL, AV_LOG_ERROR, "Error when evaluating the expression '%s'\n", expr);
+    return ret;
+}
+
+static int init_out_pool(AVFilterContext *ctx)
+{
+    NetIntCropContext *s = ctx->priv;
+    AVHWFramesContext *out_frames_ctx;
+    int pool_size = DEFAULT_NI_FILTER_POOL_SIZE;
+
+    out_frames_ctx = (AVHWFramesContext*)s->out_frames_ref->data;
+    pool_size += ctx->extra_hw_frames > 0 ? ctx->extra_hw_frames : 0;
+    s->buffer_limit = 1;
+
+    /* Create frame pool on device */
+    return ff_ni_build_frame_pool(&s->api_ctx, out_frames_ctx->width,
+                                  out_frames_ctx->height,
+                                  out_frames_ctx->sw_format, pool_size,
+                                  s->buffer_limit);
+}
+
+static int config_output(AVFilterLink *link)
+{
+    NetIntCropContext *s = link->src->priv;
+    AVHWFramesContext *in_frames_ctx;
+    AVHWFramesContext *out_frames_ctx;
+    AVFilterContext *ctx = (AVFilterContext *)link->src;;
+
+    link->w = s->w;
+    link->h = s->h;
+    link->sample_aspect_ratio = s->out_sar;
+
+    FilterLink *li = ff_filter_link(ctx->inputs[0]);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+    if (in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_8_TILE_4X4 ||
+        in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_10_TILE_4X4) {
+        av_log(ctx, AV_LOG_ERROR, "tile4x4 not supported\n");
+        return AVERROR(EINVAL);
+    }
+
+    //skip the color range check
+    if (s->auto_skip &&
+        (s->x_expr && strcmp(s->x_expr, "0") == 0 && s->y_expr && strcmp(s->y_expr, "0") == 0) &&
+        (in_frames_ctx->width == link->w && in_frames_ctx->height == link->h)
+       ) {
+        //skip hardware crop
+        s->skip_filter = 1;
+
+        FilterLink *lo = ff_filter_link(link);
+        s->out_frames_ref = av_buffer_ref(li->hw_frames_ctx);
+        if (!s->out_frames_ref) {
+            return AVERROR(ENOMEM);
+        }
+        av_buffer_unref(&lo->hw_frames_ctx);
+        lo->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+        if (!lo->hw_frames_ctx) {
+            return AVERROR(ENOMEM);
+        }
+        return 0;
+    }
+
+    s->out_frames_ref = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+    if (!s->out_frames_ref)
+        return AVERROR(ENOMEM);
+
+    out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+
+    out_frames_ctx->format    = AV_PIX_FMT_NI_QUAD;
+    out_frames_ctx->width     = s->w;
+    out_frames_ctx->height    = s->h;
+    out_frames_ctx->sw_format = in_frames_ctx->sw_format;
+    out_frames_ctx->initial_pool_size =
+        NI_CROP_ID; // Repurposed as identity code
+
+    av_hwframe_ctx_init(s->out_frames_ref);
+
+    FilterLink *lo = ff_filter_link(link);
+    av_buffer_unref(&lo->hw_frames_ctx);
+
+    lo->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+    if (!lo->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *link, AVFrame *frame)
+{
+    AVFilterContext *ctx = link->dst;
+    NetIntCropContext *s = ctx->priv;
+    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFrame *out = NULL;
+    niFrameSurface1_t* frame_surface,*new_frame_surface;
+    AVHWFramesContext *pAVHFWCtx;
+    AVNIDeviceContext *pAVNIDevCtx;
+    ni_retcode_t retcode;
+    uint32_t scaler_format;
+    int cardno;
+    uint16_t tempFID;
+
+    pAVHFWCtx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+    if (!pAVHFWCtx) {
+        return AVERROR(EINVAL);
+    }
+
+    pAVNIDevCtx = (AVNIDeviceContext *)pAVHFWCtx->device_ctx->hwctx;
+    if (!pAVNIDevCtx) {
+        return AVERROR(EINVAL);
+    }
+
+    cardno = ni_get_cardno(frame);
+
+    if (s->skip_filter) {
+        //skip hardware crop
+        return ff_filter_frame(link->dst->outputs[0], frame);
+    }
+
+    if (!s->initialized) {
+        retcode = ni_device_session_context_init(&s->api_ctx);
+        if (retcode < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "ni crop filter session context init failure\n");
+            goto fail;
+        }
+
+        s->api_ctx.device_handle = pAVNIDevCtx->cards[cardno];
+        s->api_ctx.blk_io_handle = pAVNIDevCtx->cards[cardno];
+
+        s->api_ctx.hw_id             = cardno;
+        s->api_ctx.device_type       = NI_DEVICE_TYPE_SCALER;
+        s->api_ctx.scaler_operation  = NI_SCALER_OPCODE_CROP;
+        s->api_ctx.keep_alive_timeout = s->keep_alive_timeout;
+
+        retcode = ni_device_session_open(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Can't open device session on card %d\n",
+                   cardno);
+
+            /* Close operation will free the device frames */
+            ni_device_session_close(&s->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+            ni_device_session_context_clear(&s->api_ctx);
+            goto fail;
+        }
+
+        s->session_opened = 1;
+
+        if (!((av_strstart(outlink->dst->filter->name, "ni_quadra", NULL)) || (av_strstart(outlink->dst->filter->name, "hwdownload", NULL)))) {
+           ctx->extra_hw_frames = (DEFAULT_FRAME_THREAD_QUEUE_SIZE > 1) ? DEFAULT_FRAME_THREAD_QUEUE_SIZE : 0;
+        }
+        retcode = init_out_pool(ctx);
+        if (retcode < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Internal output allocation failed rc = %d\n", retcode);
+            goto fail;
+        }
+
+        AVHWFramesContext *out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+        AVNIFramesContext *out_ni_ctx = (AVNIFramesContext *)out_frames_ctx->hwctx;
+        ni_cpy_hwframe_ctx(pAVHFWCtx, out_frames_ctx);
+        ni_device_session_copy(&s->api_ctx, &out_ni_ctx->api_ctx);
+
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pAVHFWCtx->sw_format);
+
+        if ((frame->color_range == AVCOL_RANGE_JPEG) && !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+            av_log(link->dst, AV_LOG_WARNING,
+                   "WARNING: Full color range input, limited color range output\n");
+        }
+
+        s->initialized = 1;
+    }
+
+    FilterLink *l = ff_filter_link(link);
+    s->var_values[VAR_N] = l->frame_count_out;
+    s->var_values[VAR_T] = frame->pts == AV_NOPTS_VALUE ?
+        NAN : frame->pts * av_q2d(link->time_base);
+    s->var_values[VAR_X] = av_expr_eval(s->x_pexpr, s->var_values, NULL);
+    s->var_values[VAR_Y] = av_expr_eval(s->y_pexpr, s->var_values, NULL);
+    s->var_values[VAR_X] = av_expr_eval(s->x_pexpr, s->var_values, NULL);
+
+    normalize_double(&s->x, s->var_values[VAR_X]);
+    normalize_double(&s->y, s->var_values[VAR_Y]);
+
+    if (s->x < 0)
+        s->x = 0;
+    if (s->y < 0)
+        s->y = 0;
+    if ((unsigned)s->x + (unsigned)s->w > link->w)
+        s->x = link->w - s->w;
+    if ((unsigned)s->y + (unsigned)s->h > link->h)
+        s->y = link->h - s->h;
+
+    s->x &= ~((1 << s->hsub) - 1);
+    s->y &= ~((1 << s->vsub) - 1);
+
+    av_log(ctx, AV_LOG_TRACE, "n:%d t:%f x:%d y:%d x+w:%d y+h:%d\n",
+           (int)s->var_values[VAR_N], s->var_values[VAR_T], s->x, s->y,
+           s->x+s->w, s->y+s->h);
+
+    frame_surface = (niFrameSurface1_t *) frame->data[3];
+    if (frame_surface == NULL) {
+        retcode = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(pAVHFWCtx->sw_format);
+
+    retcode = ni_frame_buffer_alloc_hwenc(&s->api_dst_frame.data.frame,
+                                          outlink->w,
+                                          outlink->h,
+                                          0);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark(NULL);
+#endif
+
+    /*
+     * Allocate device input frame. This call won't actually allocate a frame,
+     * but sends the incoming hardware frame index to the scaler manager
+     */
+    retcode = ni_device_alloc_frame(&s->api_ctx,
+                                    FFALIGN(frame->width, 2),
+                                    FFALIGN(frame->height, 2),
+                                    scaler_format,
+                                    0,    // input frame
+                                    s->w, // src rectangle width
+                                    s->h, // src rectangle height
+                                    s->x, // src rectangle x
+                                    s->y, // src rectangle y
+                                    frame_surface->ui32nodeAddress,
+                                    frame_surface->ui16FrameIdx,
+                                    NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_DEBUG, "Can't assign input frame %d\n",
+               retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    /* Allocate device destination frame This will acquire a frame from the pool */
+    retcode = ni_device_alloc_frame(&s->api_ctx,
+                        FFALIGN(outlink->w,2),
+                        FFALIGN(outlink->h,2),
+                        scaler_format,
+                        NI_SCALER_FLAG_IO,
+                        0,
+                        0,
+                        0,
+                        0,
+                        0,
+                        -1,
+                        NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_DEBUG, "Can't allocate device output frame %d\n",
+               retcode);
+
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    out = av_frame_alloc();
+    if (!out) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    av_frame_copy_props(out,frame);
+
+    out->width  = s->w;
+    out->height = s->h;
+
+    out->format = AV_PIX_FMT_NI_QUAD;
+
+    /* Quadra 2D engine always outputs limited color range */
+    out->color_range = AVCOL_RANGE_MPEG;
+
+    /* Reference the new hw frames context */
+    out->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+
+    out->data[3] = av_malloc(sizeof(niFrameSurface1_t));
+
+    if (!out->data[3]) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    /* Copy the frame surface from the incoming frame */
+    memcpy(out->data[3], frame->data[3], sizeof(niFrameSurface1_t));
+
+    /* Set the new frame index */
+    retcode = ni_device_session_read_hwdesc(&s->api_ctx, &s->api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Can't acquire output frame %d\n",retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark("ni_quadra_crop");
+#endif
+
+    tempFID           = frame_surface->ui16FrameIdx;
+    frame_surface = (niFrameSurface1_t *) out->data[3];
+    new_frame_surface = (niFrameSurface1_t *) s->api_dst_frame.data.frame.p_data[3];
+    frame_surface->ui16FrameIdx   = new_frame_surface->ui16FrameIdx;
+    frame_surface->ui16session_ID = new_frame_surface->ui16session_ID;
+    frame_surface->device_handle  = (int32_t)pAVNIDevCtx->cards[cardno];
+    frame_surface->output_idx     = new_frame_surface->output_idx;
+    frame_surface->src_cpu        = new_frame_surface->src_cpu;
+    frame_surface->dma_buf_fd     = 0;
+
+    ff_ni_set_bit_depth_and_encoding_type(&frame_surface->bit_depth,
+                                          &frame_surface->encoding_type,
+                                          pAVHFWCtx->sw_format);
+
+    /* Remove ni-split specific assets */
+    frame_surface->ui32nodeAddress = 0;
+
+    frame_surface->ui16width = out->width;
+    frame_surface->ui16height = out->height;
+
+    av_log(ctx, AV_LOG_DEBUG,
+           "vf_crop_ni.c:IN trace ui16FrameIdx = [%d] --> out = [%d] \n",
+           tempFID, frame_surface->ui16FrameIdx);
+
+    out->buf[0] = av_buffer_create(out->data[3], sizeof(niFrameSurface1_t), ff_ni_frame_free, NULL, 0);
+
+    av_frame_free(&frame);
+
+    return ff_filter_frame(link->dst->outputs[0], out);
+
+fail:
+    av_frame_free(&frame);
+    if (out)
+        av_frame_free(&out);
+    return retcode;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink  *inlink = ctx->inputs[0];
+    AVFilterLink  *outlink = ctx->outputs[0];
+    AVFrame *frame = NULL;
+    int ret = 0;
+    NetIntCropContext *s = inlink->dst->priv;
+
+    // Forward the status on output link to input link, if the status is set, discard all queued frames
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (ff_inlink_check_available_frame(inlink)) {
+        if (s->initialized) {
+            ret = ni_device_session_query_buffer_avail(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        }
+
+        if (ret == NI_RETCODE_ERROR_UNSUPPORTED_FW_VERSION) {
+            av_log(ctx, AV_LOG_WARNING, "No backpressure support in FW\n");
+        } else if (ret < 0) {
+            av_log(ctx, AV_LOG_WARNING, "%s: query ret %d, ready %u inlink framequeue %u available_frame %d outlink framequeue %u frame_wanted %d - return NOT READY\n",
+                __func__, ret, ctx->ready, ff_inlink_queued_frames(inlink), ff_inlink_check_available_frame(inlink), ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+            return FFERROR_NOT_READY;
+        }
+
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+
+        ret = filter_frame(inlink, frame);
+        if (ret >= 0) {
+            ff_filter_set_ready(ctx, 300);
+        }
+        return ret;
+    }
+
+    // We did not get a frame from input link, check its status
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+
+    // We have no frames yet from input link and no EOF, so request some.
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+#define OFFSET(x) offsetof(NetIntCropContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+
+static const AVOption ni_crop_options[] = {
+    { "out_w",       "set the width crop area expression",  OFFSET(w_expr),      AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
+    { "w",           "set the width crop area expression",  OFFSET(w_expr),      AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
+    { "out_h",       "set the height crop area expression", OFFSET(h_expr),      AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
+    { "h",           "set the height crop area expression", OFFSET(h_expr),      AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
+    { "x",           "set the x crop area expression",      OFFSET(x_expr),      AV_OPT_TYPE_STRING, {.str = "(in_w-out_w)/2"}, .flags = FLAGS },
+    { "y",           "set the y crop area expression",      OFFSET(y_expr),      AV_OPT_TYPE_STRING, {.str = "(in_h-out_h)/2"}, .flags = FLAGS },
+    { "keep_aspect", "keep aspect ratio",                   OFFSET(keep_aspect), AV_OPT_TYPE_BOOL,   {.i64=0}, 0, 1, FLAGS },
+    NI_FILT_OPTION_AUTO_SKIP,
+    NI_FILT_OPTION_KEEPALIVE,
+    NI_FILT_OPTION_BUFFER_LIMIT,
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ni_crop);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+        .config_props = config_input,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+};
+
+FFFilter ff_vf_crop_ni_quadra = {
+    .p.name          = "ni_quadra_crop",
+    .p.description   = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra crop the input video v" NI_XCODER_REVISION),
+    .p.priv_class    = &ni_crop_class,
+    .priv_size       = sizeof(NetIntCropContext),
+    .uninit          = uninit,
+    .activate        = activate,
+    .flags_internal  = FF_FILTER_FLAG_HWFRAME_AWARE,
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    FILTER_QUERY_FUNC(query_formats),
+};
diff --git a/libavfilter/vf_drawbox_ni.c b/libavfilter/vf_drawbox_ni.c
new file mode 100644
index 0000000000..601e3a49d3
--- /dev/null
+++ b/libavfilter/vf_drawbox_ni.c
@@ -0,0 +1,831 @@
+/*
+ * Copyright (c) 2007 Bobby Bingham
+ * Copyright (c) 2020 NetInt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * drawbox video filter
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "nifilter.h"
+#include "version.h"
+#include "filters.h"
+#include "formats.h"
+#include "libavutil/mem.h"
+#include "fftools/ffmpeg_sched.h"
+#include "scale_eval.h"
+#include "video.h"
+#include "libavutil/avstring.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/eval.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/avassert.h"
+#include "libswscale/swscale.h"
+
+enum OutputFormat {
+    OUTPUT_FORMAT_YUV420P,
+    OUTPUT_FORMAT_YUYV422,
+    OUTPUT_FORMAT_UYVY422,
+    OUTPUT_FORMAT_NV12,
+    OUTPUT_FORMAT_ARGB,
+    OUTPUT_FORMAT_RGBA,
+    OUTPUT_FORMAT_ABGR,
+    OUTPUT_FORMAT_BGRA,
+    OUTPUT_FORMAT_YUV420P10LE,
+    OUTPUT_FORMAT_NV16,
+    OUTPUT_FORMAT_BGR0,
+    OUTPUT_FORMAT_P010LE,
+    OUTPUT_FORMAT_AUTO,
+    OUTPUT_FORMAT_NB
+};
+
+static const char *const var_names[] = {
+    "dar",
+    "in_h", "ih",      ///< height of the input video
+    "in_w", "iw",      ///< width  of the input video
+    "sar",
+    "x",
+    "y",
+    "h",              ///< height of the rendered box
+    "w",              ///< width  of the rendered box
+    "fill",
+    NULL
+};
+
+enum { R, G, B, A };
+
+enum var_name {
+    VAR_DAR,
+    VAR_IN_H, VAR_IH,
+    VAR_IN_W, VAR_IW,
+    VAR_SAR,
+    VAR_X,
+    VAR_Y,
+    VAR_H,
+    VAR_W,
+    VAR_MAX,
+    VARS_NB
+};
+
+typedef struct NetIntDrawBoxContext {
+    const AVClass *class;
+
+    /**
+     * New dimensions. Special values are:
+     *   0 = original width/height
+     *  -1 = keep original aspect
+     *  -N = try to keep aspect but make sure it is divisible by N
+     */
+    int w, h;
+    int box_x[NI_MAX_SUPPORT_DRAWBOX_NUM], box_y[NI_MAX_SUPPORT_DRAWBOX_NUM], box_w[NI_MAX_SUPPORT_DRAWBOX_NUM], box_h[NI_MAX_SUPPORT_DRAWBOX_NUM];
+    unsigned char box_rgba_color[NI_MAX_SUPPORT_DRAWBOX_NUM][4];
+    ni_scaler_multi_drawbox_params_t scaler_drawbox_paras;
+    char *size_str;
+
+    char *box_x_expr[NI_MAX_SUPPORT_DRAWBOX_NUM];
+    char *box_y_expr[NI_MAX_SUPPORT_DRAWBOX_NUM];
+    char *box_w_expr[NI_MAX_SUPPORT_DRAWBOX_NUM];
+    char *box_h_expr[NI_MAX_SUPPORT_DRAWBOX_NUM];
+    char *box_color_str[NI_MAX_SUPPORT_DRAWBOX_NUM];
+
+    int format;
+
+    enum AVPixelFormat out_format;
+    AVBufferRef *out_frames_ref;
+
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_dst_frame;
+    ni_scaler_params_t params;
+
+    int initialized;
+    int session_opened;
+    int keep_alive_timeout; /* keep alive timeout setting */
+    int inplace;
+    int buffer_limit;
+
+    ni_frame_config_t frame_in;
+    ni_frame_config_t frame_out;
+} NetIntDrawBoxContext;
+
+FFFilter ff_vf_drawbox_ni;
+
+static const int NUM_EXPR_EVALS = 4;
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    NetIntDrawBoxContext *drawbox = ctx->priv;
+
+    uint8_t rgba_color[4];
+
+    if (av_parse_color(rgba_color, drawbox->box_color_str[0], -1, ctx) < 0)
+        return AVERROR(EINVAL);
+
+    drawbox->box_rgba_color[0][R] = rgba_color[0];
+    drawbox->box_rgba_color[0][G] = rgba_color[1];
+    drawbox->box_rgba_color[0][B] = rgba_color[2];
+    drawbox->box_rgba_color[0][A] = rgba_color[3];
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    NetIntDrawBoxContext *s = ctx->priv;
+    double var_values[VARS_NB], res;
+    char *expr;
+    int ret;
+    int i;
+
+    var_values[VAR_IN_H] = var_values[VAR_IH] = inlink->h;
+    var_values[VAR_IN_W] = var_values[VAR_IW] = inlink->w;
+    var_values[VAR_SAR]  = inlink->sample_aspect_ratio.num ? av_q2d(inlink->sample_aspect_ratio) : 1;
+    var_values[VAR_DAR]  = (double)inlink->w / inlink->h * var_values[VAR_SAR];
+    var_values[VAR_X] = NAN;
+    var_values[VAR_Y] = NAN;
+    var_values[VAR_H] = NAN;
+    var_values[VAR_W] = NAN;
+
+    for (i = 0; i < NI_MAX_SUPPORT_DRAWBOX_NUM; i++) {
+        /* evaluate expressions, fail on last iteration */
+        var_values[VAR_MAX] = inlink->w;
+        if ((ret = av_expr_parse_and_eval(&res, (expr = s->box_x_expr[i]),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+            goto fail;
+        s->box_x[i] = var_values[VAR_X] = ((res < var_values[VAR_MAX]) ? res : (var_values[VAR_MAX] - 1));
+
+        var_values[VAR_MAX] = inlink->h;
+        if ((ret = av_expr_parse_and_eval(&res, (expr = s->box_y_expr[i]),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+            goto fail;
+        s->box_y[i] = var_values[VAR_Y] = ((res < var_values[VAR_MAX]) ? res : (var_values[VAR_MAX] - 1));
+
+        var_values[VAR_MAX] = inlink->w - s->box_x[i];
+        if ((ret = av_expr_parse_and_eval(&res, (expr = s->box_w_expr[i]),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+            goto fail;
+        s->box_w[i] = var_values[VAR_W] = ((res < var_values[VAR_MAX]) ? res : var_values[VAR_MAX]);
+
+        var_values[VAR_MAX] = inlink->h - s->box_y[i];
+        if ((ret = av_expr_parse_and_eval(&res, (expr = s->box_h_expr[i]),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+            goto fail;
+        s->box_h[i] = var_values[VAR_H] = ((res < var_values[VAR_MAX]) ? res : var_values[VAR_MAX]);
+
+        /* if w or h are zero, use the input w/h */
+        s->box_w[i] = (s->box_w[i] > 0) ? s->box_w[i] : inlink->w;
+        s->box_h[i] = (s->box_h[i] > 0) ? s->box_h[i] : inlink->h;
+
+        /* sanity check width and height */
+        if (s->box_w[i] <  0 || s->box_h[i] <  0) {
+            av_log(ctx, AV_LOG_ERROR, "Size values less than 0 are not acceptable.\n");
+            return AVERROR(EINVAL);
+        }
+        av_log(ctx, AV_LOG_VERBOSE, "%d: x:%d y:%d w:%d h:%d color:0x%02X%02X%02X%02X\n",
+            i, s->box_x[i], s->box_y[i], s->box_w[i], s->box_h[i],
+            s->box_rgba_color[0][R], s->box_rgba_color[0][G], s->box_rgba_color[0][B], s->box_rgba_color[0][A]);
+    }
+
+    FilterLink *li = ff_filter_link(ctx->inputs[0]);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+
+fail:
+    av_log(ctx, AV_LOG_ERROR,
+           "Error when evaluating the expression '%s'.\n",
+           expr);
+    return ret;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] =
+        {AV_PIX_FMT_NI_QUAD, AV_PIX_FMT_NONE};
+    AVFilterFormats *formats;
+
+    formats = ff_make_format_list(pix_fmts);
+
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(ctx, formats);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NetIntDrawBoxContext *drawbox = ctx->priv;
+
+    if (drawbox->api_dst_frame.data.frame.p_buffer)
+        ni_frame_buffer_free(&drawbox->api_dst_frame.data.frame);
+
+    if (drawbox->session_opened) {
+        /* Close operation will free the device frames */
+        ni_device_session_close(&drawbox->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&drawbox->api_ctx);
+    }
+
+    av_buffer_unref(&drawbox->out_frames_ref);
+}
+
+static int init_out_pool(AVFilterContext *ctx)
+{
+    NetIntDrawBoxContext *s = ctx->priv;
+    AVHWFramesContext *out_frames_ctx;
+    int pool_size = DEFAULT_NI_FILTER_POOL_SIZE;
+
+    out_frames_ctx   = (AVHWFramesContext*)s->out_frames_ref->data;
+    pool_size += ctx->extra_hw_frames > 0 ? ctx->extra_hw_frames : 0;
+    s->buffer_limit = 1;
+
+    /* Create frame pool on device */
+    return ff_ni_build_frame_pool(&s->api_ctx, out_frames_ctx->width,
+                                  out_frames_ctx->height, s->out_format,
+                                  pool_size,
+                                  s->buffer_limit);
+}
+
+static int config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink0 = outlink->src->inputs[0];
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    AVHWFramesContext *in_frames_ctx;
+    AVHWFramesContext *out_frames_ctx;
+    NetIntDrawBoxContext *drawbox = ctx->priv;
+    int w, h, ret, h_shift, v_shift;
+
+    if ((ret = ff_scale_eval_dimensions(ctx,
+                                        "iw", "ih",
+                                        inlink, outlink,
+                                        &w, &h)) < 0)
+        goto fail;
+
+    /* Note that force_original_aspect_ratio may overwrite the previous set
+     * dimensions so that it is not divisible by the set factors anymore
+     * unless force_divisible_by is defined as well */
+
+    if (w > NI_MAX_RESOLUTION_WIDTH || h > NI_MAX_RESOLUTION_HEIGHT) {
+        av_log(ctx, AV_LOG_ERROR, "DrawBox value (%dx%d) > 8192 not allowed\n", w, h);
+        return AVERROR(EINVAL);
+    }
+
+    if ((w <= 0) || (h <= 0)) {
+        av_log(ctx, AV_LOG_ERROR, "DrawBox value (%dx%d) not allowed\n", w, h);
+        return AVERROR(EINVAL);
+    }
+
+    FilterLink *li = ff_filter_link(ctx->inputs[0]);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+    /* Set the output format */
+    drawbox->out_format = in_frames_ctx->sw_format;
+
+    av_pix_fmt_get_chroma_sub_sample(drawbox->out_format, &h_shift, &v_shift);
+
+    outlink->w = FFALIGN(w, (1 << h_shift));
+    outlink->h = FFALIGN(h, (1 << v_shift));
+
+    if (inlink0->sample_aspect_ratio.num) {
+        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink0->w, outlink->w * inlink0->h}, inlink0->sample_aspect_ratio);
+    } else {
+        outlink->sample_aspect_ratio = inlink0->sample_aspect_ratio;
+
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE,
+           "w:%d h:%d fmt:%s sar:%d/%d -> w:%d h:%d fmt:%s sar:%d/%d\n",
+           inlink->w, inlink->h, av_get_pix_fmt_name(inlink->format),
+           inlink->sample_aspect_ratio.num, inlink->sample_aspect_ratio.den,
+           outlink->w, outlink->h, av_get_pix_fmt_name(outlink->format),
+           outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den);
+
+    drawbox->out_frames_ref = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+    if (!drawbox->out_frames_ref)
+        return AVERROR(ENOMEM);
+
+    out_frames_ctx = (AVHWFramesContext *)drawbox->out_frames_ref->data;
+
+    out_frames_ctx->format    = AV_PIX_FMT_NI_QUAD;
+    out_frames_ctx->width     = outlink->w;
+    out_frames_ctx->height    = outlink->h;
+    out_frames_ctx->sw_format = drawbox->out_format;
+    out_frames_ctx->initial_pool_size =
+        NI_DRAWBOX_ID; // Repurposed as identity code
+
+    av_hwframe_ctx_init(drawbox->out_frames_ref);
+
+    FilterLink *lo = ff_filter_link(ctx->outputs[0]);
+    av_buffer_unref(&lo->hw_frames_ctx);
+    lo->hw_frames_ctx = av_buffer_ref(drawbox->out_frames_ref);
+
+    if (!lo->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    return 0;
+
+fail:
+    return ret;
+}
+
+/* Process a received frame */
+static int filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    NetIntDrawBoxContext *drawbox = link->dst->priv;
+    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFrame *out = NULL;
+    niFrameSurface1_t* frame_surface,*new_frame_surface;
+    AVHWFramesContext *pAVHFWCtx,*out_frames_ctx;
+    AVNIDeviceContext *pAVNIDevCtx;
+    AVNIFramesContext *out_ni_ctx;
+    ni_retcode_t retcode;
+    int drawbox_format, cardno;
+    uint16_t tempFID;
+    double var_values[VARS_NB], res;
+    char *expr;
+    int ret;
+    int i;
+    uint32_t box_count = 0;
+    const AVPixFmtDescriptor *desc;
+
+    frame_surface = (niFrameSurface1_t *) in->data[3];
+    if (frame_surface == NULL) {
+        return AVERROR(EINVAL);
+    }
+
+    pAVHFWCtx = (AVHWFramesContext *) in->hw_frames_ctx->data;
+    pAVNIDevCtx       = (AVNIDeviceContext *)pAVHFWCtx->device_ctx->hwctx;
+    cardno            = ni_get_cardno(in);
+
+    if (!drawbox->initialized) {
+        retcode = ni_device_session_context_init(&drawbox->api_ctx);
+        if (retcode < 0) {
+            av_log(link->dst, AV_LOG_ERROR,
+                   "ni drawbox filter session context init failure\n");
+            goto fail;
+        }
+
+        drawbox->api_ctx.device_handle = pAVNIDevCtx->cards[cardno];
+        drawbox->api_ctx.blk_io_handle = pAVNIDevCtx->cards[cardno];
+
+        drawbox->api_ctx.hw_id             = cardno;
+        drawbox->api_ctx.device_type       = NI_DEVICE_TYPE_SCALER;
+        drawbox->api_ctx.scaler_operation  = NI_SCALER_OPCODE_DRAWBOX;
+        drawbox->api_ctx.keep_alive_timeout = drawbox->keep_alive_timeout;
+
+        av_log(link->dst, AV_LOG_INFO,
+               "Open drawbox session to card %d, hdl %d, blk_hdl %d\n", cardno,
+               drawbox->api_ctx.device_handle, drawbox->api_ctx.blk_io_handle);
+
+        retcode =
+            ni_device_session_open(&drawbox->api_ctx, NI_DEVICE_TYPE_SCALER);
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(link->dst, AV_LOG_ERROR,
+                   "Can't open device session on card %d\n", cardno);
+
+            /* Close operation will free the device frames */
+            ni_device_session_close(&drawbox->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+            ni_device_session_context_clear(&drawbox->api_ctx);
+            goto fail;
+        }
+
+        drawbox->session_opened = 1;
+
+        if (drawbox->params.filterblit) {
+            retcode = ni_scaler_set_params(&drawbox->api_ctx, &(drawbox->params));
+            if (retcode < 0)
+                goto fail;
+        }
+
+        if (!((av_strstart(outlink->dst->filter->name, "ni_quadra", NULL)) || (av_strstart(outlink->dst->filter->name, "hwdownload", NULL)))) {
+           link->dst->extra_hw_frames = (DEFAULT_FRAME_THREAD_QUEUE_SIZE > 1) ? DEFAULT_FRAME_THREAD_QUEUE_SIZE : 0;
+        }
+        retcode = init_out_pool(link->dst);
+
+        if (retcode < 0) {
+            av_log(link->dst, AV_LOG_ERROR,
+                   "Internal output allocation failed rc = %d\n", retcode);
+            goto fail;
+        }
+
+        out_frames_ctx = (AVHWFramesContext *)drawbox->out_frames_ref->data;
+        out_ni_ctx = (AVNIFramesContext *)out_frames_ctx->hwctx;
+        ni_cpy_hwframe_ctx(pAVHFWCtx, out_frames_ctx);
+        ni_device_session_copy(&drawbox->api_ctx, &out_ni_ctx->api_ctx);
+
+        desc = av_pix_fmt_desc_get(pAVHFWCtx->sw_format);
+
+        if ((in->color_range == AVCOL_RANGE_JPEG) && !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+            av_log(link->dst, AV_LOG_WARNING,
+                   "WARNING: Full color range input, limited color range output\n");
+        }
+
+        drawbox->initialized = 1;
+    }
+
+    drawbox_format = ff_ni_ffmpeg_to_gc620_pix_fmt(pAVHFWCtx->sw_format);
+
+    retcode = ni_frame_buffer_alloc_hwenc(&drawbox->api_dst_frame.data.frame,
+                                          outlink->w,
+                                          outlink->h,
+                                          0);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    var_values[VAR_IN_H] = var_values[VAR_IH] = link->h;
+    var_values[VAR_IN_W] = var_values[VAR_IW] = link->w;
+    var_values[VAR_X] = NAN;
+    var_values[VAR_Y] = NAN;
+    var_values[VAR_H] = NAN;
+    var_values[VAR_W] = NAN;
+
+    memset(&drawbox->scaler_drawbox_paras, 0, sizeof(drawbox->scaler_drawbox_paras));
+    for (i = 0; i < NI_MAX_SUPPORT_DRAWBOX_NUM; i++) {
+        /* evaluate expressions, fail on last iteration */
+        var_values[VAR_MAX] = link->w;
+        if ((ret = av_expr_parse_and_eval(&res, (expr = drawbox->box_x_expr[i]),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, link->dst)) < 0)
+            goto fail;
+        drawbox->box_x[i] = var_values[VAR_X] = ((res < var_values[VAR_MAX]) ? ((res < 0) ? 0 : res) : (var_values[VAR_MAX] - 1));
+
+        var_values[VAR_MAX] = link->h;
+        if ((ret = av_expr_parse_and_eval(&res, (expr = drawbox->box_y_expr[i]),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, link->dst)) < 0)
+            goto fail;
+        drawbox->box_y[i] = var_values[VAR_Y] = ((res < var_values[VAR_MAX]) ? ((res < 0) ? 0 : res) : (var_values[VAR_MAX] - 1));
+
+        var_values[VAR_MAX] = link->w - drawbox->box_x[i];
+        if ((ret = av_expr_parse_and_eval(&res, (expr = drawbox->box_w_expr[i]),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, link->dst)) < 0)
+            goto fail;
+        drawbox->box_w[i] = var_values[VAR_W] = ((res < var_values[VAR_MAX]) ? res : var_values[VAR_MAX]);
+        drawbox->box_w[i] = (drawbox->box_w[i] >= 0) ? drawbox->box_w[i] : var_values[VAR_MAX];
+
+        var_values[VAR_MAX] = link->h - drawbox->box_y[i];
+        if ((ret = av_expr_parse_and_eval(&res, (expr = drawbox->box_h_expr[i]),
+                                          var_names, var_values,
+                                          NULL, NULL, NULL, NULL, NULL, 0, link->dst)) < 0)
+            goto fail;
+        drawbox->box_h[i] = var_values[VAR_H] = ((res < var_values[VAR_MAX]) ? res : var_values[VAR_MAX]);
+
+        drawbox->box_h[i] = (drawbox->box_h[i] >= 0) ? drawbox->box_h[i] : var_values[VAR_MAX];
+        /* sanity check width and height */
+        if (drawbox->box_w[i] <  0 || drawbox->box_h[i] <  0) {
+            av_log(link->dst, AV_LOG_ERROR, "Size values less than 0 are not acceptable.\n");
+            return AVERROR(EINVAL);
+        }
+
+            // please use drawbox->scaler_drawbox_paras to pass draw parameters
+        av_log(link->dst, AV_LOG_DEBUG,"%d: x %d, y %d, w %d, h %d, color %x\n", \
+            i, drawbox->box_x[i], drawbox->box_y[i], drawbox->box_w[i], drawbox->box_h[i], \
+            drawbox->box_rgba_color[i][0] + (drawbox->box_rgba_color[i][1] << 8) + (drawbox->box_rgba_color[i][2] << 16) + (drawbox->box_rgba_color[i][3] << 24));
+
+        if ((drawbox->box_w[i] > 0) && (drawbox->box_h[i] > 0)) {
+            drawbox->scaler_drawbox_paras.multi_drawbox_params[box_count].start_x = drawbox->box_x[i];
+            drawbox->scaler_drawbox_paras.multi_drawbox_params[box_count].start_y = drawbox->box_y[i];
+            drawbox->scaler_drawbox_paras.multi_drawbox_params[box_count].end_x = drawbox->box_x[i] + drawbox->box_w[i] - 1;
+            drawbox->scaler_drawbox_paras.multi_drawbox_params[box_count].end_y = drawbox->box_y[i] + drawbox->box_h[i] - 1;
+            drawbox->scaler_drawbox_paras.multi_drawbox_params[box_count].rgba_c = drawbox->box_rgba_color[0][B] + (drawbox->box_rgba_color[0][G] << 8) + (drawbox->box_rgba_color[0][R] << 16) + (drawbox->box_rgba_color[0][A] << 24);
+            if ((drawbox->box_w[i] > 0) && (drawbox->box_h[i] > 0))
+                box_count++;
+        }
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark(NULL);
+#endif
+
+    retcode = ni_scaler_set_drawbox_params(&drawbox->api_ctx,
+                    &drawbox->scaler_drawbox_paras.multi_drawbox_params[0]);
+    if (retcode != NI_RETCODE_SUCCESS) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    drawbox->frame_in.picture_width  = FFALIGN(in->width, 2);
+    drawbox->frame_in.picture_height = FFALIGN(in->height, 2);
+    drawbox->frame_in.picture_format = drawbox_format;
+    drawbox->frame_in.session_id     = frame_surface->ui16session_ID;
+    drawbox->frame_in.output_index   = frame_surface->output_idx;
+    drawbox->frame_in.frame_index    = frame_surface->ui16FrameIdx;
+
+    /*
+     * Config device input frame parameters
+     */
+    retcode = ni_device_config_frame(&drawbox->api_ctx, &drawbox->frame_in);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(link->dst, AV_LOG_DEBUG,
+               "Can't allocate device input frame %d\n", retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    drawbox_format = ff_ni_ffmpeg_to_gc620_pix_fmt(drawbox->out_format);
+
+    drawbox->frame_out.picture_width  = outlink->w;
+    drawbox->frame_out.picture_height = outlink->h;
+    drawbox->frame_out.picture_format = drawbox_format;
+
+    /* Allocate hardware device destination frame. This acquires a frame
+     * from the pool
+     */
+    retcode = ni_device_alloc_frame(&drawbox->api_ctx,
+                                    FFALIGN(outlink->w, 2),
+                                    FFALIGN(outlink->h, 2),
+                                    drawbox_format,
+                                    NI_SCALER_FLAG_IO,
+                                    0,
+                                    0,
+                                    0,
+                                    0,
+                                    0,
+                                    drawbox->inplace ? frame_surface->ui16FrameIdx : -1,
+                                    NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(link->dst, AV_LOG_DEBUG,
+               "Can't allocate device output frame %d\n", retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    /* Set the new frame index */
+    retcode = ni_device_session_read_hwdesc(&drawbox->api_ctx, &drawbox->api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(link->dst, AV_LOG_ERROR,
+               "Can't acquire output frame %d\n",retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark("ni_quadra_drawbox");
+#endif
+
+    /*
+     * For an in-place drawbox, we have modified the input
+     * frame so just pass it along to the downstream.
+     */
+    if (drawbox->inplace) {
+        av_log(link->dst, AV_LOG_DEBUG,
+               "vf_drawbox_ni.c:IN trace ui16FrameIdx = [%d] --> out [%d] \n",
+               frame_surface->ui16FrameIdx, frame_surface->ui16FrameIdx);
+        return ff_filter_frame(link->dst->outputs[0], in);
+    }
+
+    out = av_frame_alloc();
+    if (!out) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    av_frame_copy_props(out,in);
+
+    out->width  = outlink->w;
+    out->height = outlink->h;
+
+    out->format = AV_PIX_FMT_NI_QUAD;
+
+    /* Quadra 2D engine always outputs limited color range */
+    out->color_range = AVCOL_RANGE_MPEG;
+
+    /* Reference the new hw frames context */
+    out->hw_frames_ctx = av_buffer_ref(drawbox->out_frames_ref);
+
+    out->data[3] = av_malloc(sizeof(niFrameSurface1_t));
+
+    if (!out->data[3]) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    /* Copy the frame surface from the incoming frame */
+    memcpy(out->data[3], in->data[3], sizeof(niFrameSurface1_t));
+
+    tempFID = frame_surface->ui16FrameIdx;
+    frame_surface = (niFrameSurface1_t *)out->data[3];
+    new_frame_surface = (niFrameSurface1_t *)drawbox->api_dst_frame.data.frame.p_data[3];
+    frame_surface->ui16FrameIdx   = new_frame_surface->ui16FrameIdx;
+    frame_surface->ui16session_ID = new_frame_surface->ui16session_ID;
+    frame_surface->device_handle  = new_frame_surface->device_handle;
+    frame_surface->output_idx     = new_frame_surface->output_idx;
+    frame_surface->src_cpu        = new_frame_surface->src_cpu;
+    frame_surface->dma_buf_fd     = 0;
+
+    ff_ni_set_bit_depth_and_encoding_type(&frame_surface->bit_depth,
+                                          &frame_surface->encoding_type,
+                                          pAVHFWCtx->sw_format);
+
+    /* Remove ni-split specific assets */
+    frame_surface->ui32nodeAddress = 0;
+    frame_surface->ui16width  = out->width;
+    frame_surface->ui16height = out->height;
+
+    av_log(link->dst, AV_LOG_DEBUG,
+           "vf_drawbox_ni.c:IN trace ui16FrameIdx = [%d] --> out [%d] \n",
+           tempFID, frame_surface->ui16FrameIdx);
+
+    out->buf[0] = av_buffer_create(out->data[3], sizeof(niFrameSurface1_t),
+                                   ff_ni_frame_free, NULL, 0);
+
+    av_frame_free(&in);
+
+    return ff_filter_frame(link->dst->outputs[0], out);
+
+fail:
+    av_frame_free(&in);
+    if (out)
+        av_frame_free(&out);
+    return retcode;
+}
+
+static int process_command(AVFilterContext *ctx, const char *cmd, const char *args, char *res, int res_len, int flags)
+{
+    AVFilterLink *inlink = ctx->inputs[0];
+    NetIntDrawBoxContext *s = ctx->priv;
+    int old_x = s->box_x[0];
+    int old_y = s->box_y[0];
+    int old_w = s->box_w[0];
+    int old_h = s->box_h[0];
+    char *old_color = av_strdup(s->box_color_str[0]);
+    int ret;
+
+    ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Bad command/arguments (%d)\n", ret);
+        return ret;
+    }
+
+    ret = init(ctx);
+    if (ret < 0)
+        goto end;
+    ret = config_input(inlink);
+end:
+    if (ret < 0) {
+        s->box_x[0] = old_x;
+        s->box_y[0] = old_y;
+        s->box_w[0] = old_w;
+        s->box_h[0] = old_h;
+        memcpy(s->box_color_str[0], old_color, strlen(old_color));
+    }
+
+    av_free(old_color);
+    return ret;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink  *inlink = ctx->inputs[0];
+    AVFilterLink  *outlink = ctx->outputs[0];
+    AVFrame *frame = NULL;
+    int ret = 0;
+    NetIntDrawBoxContext *s = inlink->dst->priv;
+
+    // Forward the status on output link to input link, if the status is set, discard all queued frames
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (ff_inlink_check_available_frame(inlink)) {
+        if (s->initialized && !s->inplace) {
+            ret = ni_device_session_query_buffer_avail(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        }
+
+        if (ret == NI_RETCODE_ERROR_UNSUPPORTED_FW_VERSION) {
+            av_log(ctx, AV_LOG_WARNING, "No backpressure support in FW\n");
+        } else if (ret < 0) {
+            av_log(ctx, AV_LOG_WARNING, "%s: query ret %d, ready %u inlink framequeue %lu available_frame %d outlink framequeue %lu frame_wanted %d - return NOT READY\n",
+                __func__, ret, ctx->ready, ff_inlink_queued_frames(inlink), ff_inlink_check_available_frame(inlink), ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+            return FFERROR_NOT_READY;
+        }
+
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+
+        ret = filter_frame(inlink, frame);
+        if (ret >= 0) {
+            ff_filter_set_ready(ctx, 300);
+        }
+        return ret;
+    }
+
+    // We did not get a frame from input link, check its status
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+
+    // We have no frames yet from input link and no EOF, so request some.
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+#define OFFSET(x) offsetof(NetIntDrawBoxContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+#define RFLAGS (FLAGS | AV_OPT_FLAG_RUNTIME_PARAM)
+
+static const AVOption ni_drawbox_options[] = {
+    { "x",         "set horizontal position of the left box edge", OFFSET(box_x_expr[0]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "y",         "set vertical position of the top box edge",    OFFSET(box_y_expr[0]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "width",     "set width of the box",                         OFFSET(box_w_expr[0]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "w",         "set width of the box",                         OFFSET(box_w_expr[0]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "height",    "set height of the box",                        OFFSET(box_h_expr[0]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "h",         "set height of the box",                        OFFSET(box_h_expr[0]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "color",     "set color of the box",                         OFFSET(box_color_str[0]),  AV_OPT_TYPE_STRING, {.str="black"}, 0, 0, RFLAGS },
+    { "c",         "set color of the box",                         OFFSET(box_color_str[0]),  AV_OPT_TYPE_STRING, {.str="black"}, 0, 0, RFLAGS },
+    { "x1",         "",                                            OFFSET(box_x_expr[1]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "y1",         "",                                            OFFSET(box_y_expr[1]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "w1",         "",                                            OFFSET(box_w_expr[1]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "h1",         "",                                            OFFSET(box_h_expr[1]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "x2",         "",                                            OFFSET(box_x_expr[2]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "y2",         "",                                            OFFSET(box_y_expr[2]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "w2",         "",                                            OFFSET(box_w_expr[2]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "h2",         "",                                            OFFSET(box_h_expr[2]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "x3",         "",                                            OFFSET(box_x_expr[3]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "y3",         "",                                            OFFSET(box_y_expr[3]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "w3",         "",                                            OFFSET(box_w_expr[3]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "h3",         "",                                            OFFSET(box_h_expr[3]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "x4",         "",                                            OFFSET(box_x_expr[4]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "y4",         "",                                            OFFSET(box_y_expr[4]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "w4",         "",                                            OFFSET(box_w_expr[4]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "h4",         "",                                            OFFSET(box_h_expr[4]),     AV_OPT_TYPE_STRING, {.str="0"},     0, 0, RFLAGS },
+    { "filterblit", "filterblit enable",                           OFFSET(params.filterblit), AV_OPT_TYPE_BOOL,   {.i64=0},       0, 1, FLAGS },
+    { "inplace",    "draw boxes in-place",                         OFFSET(inplace),           AV_OPT_TYPE_BOOL,   {.i64=0},       0, 1, FLAGS },
+    NI_FILT_OPTION_KEEPALIVE,
+    NI_FILT_OPTION_BUFFER_LIMIT,
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ni_drawbox);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props,
+    },
+};
+
+FFFilter ff_vf_drawbox_ni_quadra = {
+    .p.name          = "ni_quadra_drawbox",
+    .p.description   = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra video drawbox v" NI_XCODER_REVISION),
+    .p.priv_class    = &ni_drawbox_class,
+    .priv_size       = sizeof(NetIntDrawBoxContext),
+    .init            = init,
+    .uninit          = uninit,
+    .activate        = activate,
+    .flags_internal  = FF_FILTER_FLAG_HWFRAME_AWARE,
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    FILTER_QUERY_FUNC(query_formats),
+    .process_command = process_command,
+};
diff --git a/libavfilter/vf_drawtext_ni.c b/libavfilter/vf_drawtext_ni.c
new file mode 100644
index 0000000000..c7f56c9bcd
--- /dev/null
+++ b/libavfilter/vf_drawtext_ni.c
@@ -0,0 +1,3401 @@
+/*
+ * Copyright (c) 2011 Stefano Sabatini
+ * Copyright (c) 2010 S.N. Hemanth Meenakshisundaram
+ * Copyright (c) 2003 Gustavo Sverzut Barbieri <gsbarbieri@yahoo.com.br>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * NETINT drawtext filter, based on the original vf_drawtext.c
+ *
+ */
+
+#include "config.h"
+
+#if HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <time.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <fenv.h>
+
+#if CONFIG_LIBFONTCONFIG
+#include <fontconfig/fontconfig.h>
+#endif
+
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+#include "libavutil/common.h"
+#include "libavutil/file.h"
+#include "libavutil/eval.h"
+#include "libavutil/opt.h"
+#include "libavutil/random_seed.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/timecode.h"
+#include "libavutil/time_internal.h"
+#include "libavutil/tree.h"
+#include "libavutil/lfg.h"
+#include "libavutil/version.h"
+#include "nifilter.h"
+#include "filters.h"
+#include "drawutils.h"
+#include "formats.h"
+#include "libavutil/mem.h"
+
+#include "video.h"
+
+#if CONFIG_LIBFRIBIDI
+#include <fribidi.h>
+#endif
+
+#include <ft2build.h>
+#include FT_FREETYPE_H
+#include FT_GLYPH_H
+#include FT_STROKER_H
+
+#define MAX_TEXT_NUM 32
+
+static const char *const var_names[] = {
+    "dar",
+    "hsub", "vsub",
+    "line_h", "lh",           ///< line height, same as max_glyph_h
+    "main_h", "h", "H",       ///< height of the input video
+    "main_w", "w", "W",       ///< width  of the input video
+    "max_glyph_a", "ascent",  ///< max glyph ascent
+    "max_glyph_d", "descent", ///< min glyph descent
+    "max_glyph_h",            ///< max glyph height
+    "max_glyph_w",            ///< max glyph width
+    "n",                      ///< number of frame
+    "sar",
+    "t",                      ///< timestamp expressed in seconds
+    "text_h", "th",           ///< height of the rendered text
+    "text_w", "tw",           ///< width  of the rendered text
+    "x",
+    "y",
+    "pict_type",
+#if FF_API_FRAME_PKT
+    "pkt_pos",
+    "pkt_size",
+#endif
+    "pkt_duration",
+    NULL
+};
+
+static const char *const fun2_names[] = {
+    "rand"
+};
+
+static double drand(void *opaque, double min, double max)
+{
+    return min + (max-min) / UINT_MAX * av_lfg_get(opaque);
+}
+
+typedef double (*eval_func2)(void *, double a, double b);
+
+static const eval_func2 fun2[] = {
+    drand,
+    NULL
+};
+
+enum var_name {
+    VAR_DAR,
+    VAR_HSUB, VAR_VSUB,
+    VAR_LINE_H, VAR_LH,
+    VAR_MAIN_H, VAR_h, VAR_H,
+    VAR_MAIN_W, VAR_w, VAR_W,
+    VAR_MAX_GLYPH_A, VAR_ASCENT,
+    VAR_MAX_GLYPH_D, VAR_DESCENT,
+    VAR_MAX_GLYPH_H,
+    VAR_MAX_GLYPH_W,
+    VAR_N,
+    VAR_SAR,
+    VAR_T,
+    VAR_TEXT_H, VAR_TH,
+    VAR_TEXT_W, VAR_TW,
+    VAR_X,
+    VAR_Y,
+    VAR_PICT_TYPE,
+#if FF_API_FRAME_PKT
+    VAR_PKT_POS,
+    VAR_PKT_SIZE,
+#endif
+    VAR_PKT_DURATION,
+    VAR_VARS_NB
+};
+
+enum expansion_mode {
+    EXP_NONE,
+    EXP_NORMAL,
+    EXP_STRFTIME,
+};
+
+typedef struct NetIntDrawTextContext {
+    const AVClass *class;
+    int exp_mode;                   ///< expansion mode to use for the text
+    int reinit;                     ///< tells if the filter is being reinited
+    int text_num;                   ///< number of the text
+#if CONFIG_LIBFONTCONFIG
+    uint8_t *font[MAX_TEXT_NUM];    ///< font to be used
+#endif
+    uint8_t *fontfile[MAX_TEXT_NUM];///< font to be used
+    uint8_t *text[MAX_TEXT_NUM];    ///< text to be drawn
+    uint8_t *text_last_updated[MAX_TEXT_NUM];
+    AVBPrint expanded_text;         ///< used to contain the expanded text
+    uint8_t *fontcolor_expr[MAX_TEXT_NUM];        ///< fontcolor expression to evaluate
+    AVBPrint expanded_fontcolor;    ///< used to contain the expanded fontcolor spec
+    int ft_load_flags;              ///< flags used for loading fonts, see FT_LOAD_*
+    FT_Vector *positions;           ///< positions for each element in the text
+    size_t nb_positions;            ///< number of elements of positions array
+    char *textfile;                 ///< file with text to be drawn
+    int x[MAX_TEXT_NUM];            ///< x position to start drawing one text
+    int y[MAX_TEXT_NUM];            ///< y position to start drawing one text
+    int x_bak[MAX_TEXT_NUM];        ///< x position of last uploaded overlay frame
+    int y_bak[MAX_TEXT_NUM];        ///< y position of last uploaded overlay frame
+    int x_start;                    ///< x position for text canvas start in one frame
+    int y_start;                    ///< y position for text canvas start in one frame
+    int x_end;                      ///< x position for text canvas end in one frame
+    int y_end;                      ///< y position for text canvas end in one frame
+    int max_glyph_w;                ///< max glyph width
+    int max_glyph_h;                ///< max glyph height
+    int shadowx, shadowy;
+    int borderw;                    ///< border width
+    char *fontsize_expr[MAX_TEXT_NUM];            ///< expression for fontsize
+    AVExpr *fontsize_pexpr[MAX_TEXT_NUM];         ///< parsed expressions for fontsize
+    unsigned int fontsize[MAX_TEXT_NUM];          ///< font size to use
+    unsigned int default_fontsize;  ///< default font size to use
+
+    int line_spacing;               ///< lines spacing in pixels
+    short int draw_box;             ///< draw box around text - true or false
+    char *boxborderw[MAX_TEXT_NUM]; ///< box border width (padding)
+                                    ///  allowed formats: "all", "vert|oriz", "top|right|bottom|left"
+    int bb_top[MAX_TEXT_NUM];       ///< the size of the top box border
+    int bb_right[MAX_TEXT_NUM];     ///< the size of the right box border
+    int bb_bottom[MAX_TEXT_NUM];    ///< the size of the bottom box border
+    int bb_left[MAX_TEXT_NUM];      ///< the size of the left box border
+    int use_kerning[MAX_TEXT_NUM];  ///< font kerning is used - true/false
+    int tabsize[MAX_TEXT_NUM];      ///< tab size
+    int fix_bounds;                 ///< do we let it go out of frame bounds - t/f
+    int optimize_upload;
+    FFDrawContext dc;
+    FFDrawColor fontcolor[MAX_TEXT_NUM];  ///< foreground color
+    FFDrawColor shadowcolor;        ///< shadow color
+    FFDrawColor bordercolor;        ///< border color
+    FFDrawColor boxcolor[MAX_TEXT_NUM];   ///< background color
+
+    FT_Library library;             ///< freetype font library handle
+    FT_Face face[MAX_TEXT_NUM];     ///< freetype font face handle
+    FT_Stroker stroker;             ///< freetype stroker handle
+    struct AVTreeNode *glyphs;      ///< rendered glyphs, stored using the UTF-32 char code
+    char *x_expr[MAX_TEXT_NUM];     ///< expression for x position
+    char *y_expr[MAX_TEXT_NUM];     ///< expression for y position
+    AVExpr *x_pexpr[MAX_TEXT_NUM];  //< parsed expressions for x
+    AVExpr *y_pexpr[MAX_TEXT_NUM];  ///< parsed expressions for y
+    int64_t basetime;               ///< base pts time in the real world for display
+    double var_values[VAR_VARS_NB];
+    char   *a_expr;
+    AVExpr *a_pexpr;
+    int alpha;
+    AVLFG  prng;                    ///< random
+    char       *tc_opt_string;      ///< specified timecode option string
+    AVRational  tc_rate;            ///< frame rate for timecode
+    AVTimecode  tc;                 ///< timecode context
+    int tc24hmax;                   ///< 1 if timecode is wrapped to 24 hours, 0 otherwise
+    int reload;                     ///< reload text file for each frame
+    int start_number;               ///< starting frame number for n/frame_num var
+#if CONFIG_LIBFRIBIDI
+    int text_shaping;               ///< 1 to shape the text before drawing it
+#endif
+    AVDictionary *metadata;
+
+    // NI overlay related
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_dst_frame;
+    int session_opened;
+
+    // NI HW frame upload related
+    AVBufferRef *hwdevice;
+    AVBufferRef *hwframe;
+    AVBufferRef *hw_frames_ctx;
+
+    // NI watermark related
+    ni_scaler_multi_watermark_params_t scaler_watermark_paras;
+    int watermark_width0;
+    int watermark_width1;
+    int watermark_height0;
+    int watermark_height1;
+
+    // NI ovly inplace crop related
+    ni_session_context_t crop_api_ctx;
+    ni_session_data_io_t crop_api_dst_frame;
+    uint16_t ui16CropFrameIdx;
+    int crop_session_opened;
+
+    int keep_alive_timeout; /* keep alive timeout setting */
+    int buffer_limit;
+
+    int initialized;
+    int main_has_alpha;
+    int use_watermark;
+    AVBufferRef *out_frames_ref;
+
+    // contains data downloaded from the input HW frame
+    ni_session_data_io_t dl_frame;
+    // contains text portion of overlaying frame
+    ni_session_data_io_t txt_frame;
+
+    AVFrame *up_frame;
+    AVFrame *keep_overlay;
+    int upload_drawtext_frame;
+    int filtered_frame_count;
+    int framerate;
+    int initiated_upload_width;
+    int initiated_upload_height;
+} NetIntDrawTextContext;
+
+static const enum AVPixelFormat alpha_pix_fmts[] = {
+    AV_PIX_FMT_RGBA, AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR,
+    AV_PIX_FMT_BGRA, AV_PIX_FMT_NONE
+};
+
+#define OFFSET(x) offsetof(NetIntDrawTextContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+
+static const AVOption ni_drawtext_options[] = {
+    { "fontfile",    "set font file",        OFFSET(fontfile[0]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff0",         "set font file",        OFFSET(fontfile[0]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff1",         "set font file",        OFFSET(fontfile[1]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff2",         "set font file",        OFFSET(fontfile[2]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff3",         "set font file",        OFFSET(fontfile[3]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff4",         "set font file",        OFFSET(fontfile[4]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff5",         "set font file",        OFFSET(fontfile[5]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff6",         "set font file",        OFFSET(fontfile[6]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff7",         "set font file",        OFFSET(fontfile[7]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff8",         "set font file",        OFFSET(fontfile[8]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff9",         "set font file",        OFFSET(fontfile[9]),           AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff10",        "set font file",        OFFSET(fontfile[10]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff11",        "set font file",        OFFSET(fontfile[11]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff12",        "set font file",        OFFSET(fontfile[12]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff13",        "set font file",        OFFSET(fontfile[13]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff14",        "set font file",        OFFSET(fontfile[14]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff15",        "set font file",        OFFSET(fontfile[15]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff16",        "set font file",        OFFSET(fontfile[16]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff17",        "set font file",        OFFSET(fontfile[17]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff18",        "set font file",        OFFSET(fontfile[18]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff19",        "set font file",        OFFSET(fontfile[19]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff20",        "set font file",        OFFSET(fontfile[20]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff21",        "set font file",        OFFSET(fontfile[21]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff22",        "set font file",        OFFSET(fontfile[22]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff23",        "set font file",        OFFSET(fontfile[23]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff24",        "set font file",        OFFSET(fontfile[24]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff25",        "set font file",        OFFSET(fontfile[25]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff26",        "set font file",        OFFSET(fontfile[26]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff27",        "set font file",        OFFSET(fontfile[27]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff28",        "set font file",        OFFSET(fontfile[28]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff29",        "set font file",        OFFSET(fontfile[29]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff30",        "set font file",        OFFSET(fontfile[30]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "ff31",        "set font file",        OFFSET(fontfile[31]),          AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "text",        "set text",             OFFSET(text[0]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t0",          "set text",             OFFSET(text[0]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t1",          "set text",             OFFSET(text[1]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t2",          "set text",             OFFSET(text[2]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t3",          "set text",             OFFSET(text[3]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t4",          "set text",             OFFSET(text[4]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t5",          "set text",             OFFSET(text[5]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t6",          "set text",             OFFSET(text[6]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t7",          "set text",             OFFSET(text[7]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t8",          "set text",             OFFSET(text[8]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t9",          "set text",             OFFSET(text[9]),               AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t10",         "set text",             OFFSET(text[10]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t11",         "set text",             OFFSET(text[11]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t12",         "set text",             OFFSET(text[12]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t13",         "set text",             OFFSET(text[13]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t14",         "set text",             OFFSET(text[14]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t15",         "set text",             OFFSET(text[15]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t16",         "set text",             OFFSET(text[16]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t17",         "set text",             OFFSET(text[17]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t18",         "set text",             OFFSET(text[18]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t19",         "set text",             OFFSET(text[19]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t20",         "set text",             OFFSET(text[20]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t21",         "set text",             OFFSET(text[21]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t22",         "set text",             OFFSET(text[22]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t23",         "set text",             OFFSET(text[23]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t24",         "set text",             OFFSET(text[24]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t25",         "set text",             OFFSET(text[25]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t26",         "set text",             OFFSET(text[26]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t27",         "set text",             OFFSET(text[27]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t28",         "set text",             OFFSET(text[28]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t29",         "set text",             OFFSET(text[29]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t30",         "set text",             OFFSET(text[30]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "t31",         "set text",             OFFSET(text[31]),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "textfile",    "set text file",        OFFSET(textfile),              AV_OPT_TYPE_STRING, {.str=NULL},  0, 0, FLAGS },
+    { "fontcolor",   "set foreground color", OFFSET(fontcolor[0].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc0",         "set foreground color", OFFSET(fontcolor[0].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc1",         "set foreground color", OFFSET(fontcolor[1].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc2",         "set foreground color", OFFSET(fontcolor[2].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc3",         "set foreground color", OFFSET(fontcolor[3].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc4",         "set foreground color", OFFSET(fontcolor[4].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc5",         "set foreground color", OFFSET(fontcolor[5].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc6",         "set foreground color", OFFSET(fontcolor[6].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc7",         "set foreground color", OFFSET(fontcolor[7].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc8",         "set foreground color", OFFSET(fontcolor[8].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc9",         "set foreground color", OFFSET(fontcolor[9].rgba),     AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc10",        "set foreground color", OFFSET(fontcolor[10].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc11",        "set foreground color", OFFSET(fontcolor[11].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc12",        "set foreground color", OFFSET(fontcolor[12].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc13",        "set foreground color", OFFSET(fontcolor[13].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc14",        "set foreground color", OFFSET(fontcolor[14].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc15",        "set foreground color", OFFSET(fontcolor[15].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc16",        "set foreground color", OFFSET(fontcolor[16].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc17",        "set foreground color", OFFSET(fontcolor[17].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc18",        "set foreground color", OFFSET(fontcolor[18].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc19",        "set foreground color", OFFSET(fontcolor[19].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc20",        "set foreground color", OFFSET(fontcolor[20].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc21",        "set foreground color", OFFSET(fontcolor[21].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc22",        "set foreground color", OFFSET(fontcolor[22].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc23",        "set foreground color", OFFSET(fontcolor[23].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc24",        "set foreground color", OFFSET(fontcolor[24].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc25",        "set foreground color", OFFSET(fontcolor[25].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc26",        "set foreground color", OFFSET(fontcolor[26].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc27",        "set foreground color", OFFSET(fontcolor[27].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc28",        "set foreground color", OFFSET(fontcolor[28].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc29",        "set foreground color", OFFSET(fontcolor[29].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc30",        "set foreground color", OFFSET(fontcolor[30].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fc31",        "set foreground color", OFFSET(fontcolor[31].rgba),    AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "fontcolor_expr", "set foreground color expression", OFFSET(fontcolor_expr[0]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr0",    "set foreground color expression", OFFSET(fontcolor_expr[0]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr1",    "set foreground color expression", OFFSET(fontcolor_expr[1]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr2",    "set foreground color expression", OFFSET(fontcolor_expr[2]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr3",    "set foreground color expression", OFFSET(fontcolor_expr[3]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr4",    "set foreground color expression", OFFSET(fontcolor_expr[4]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr5",    "set foreground color expression", OFFSET(fontcolor_expr[5]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr6",    "set foreground color expression", OFFSET(fontcolor_expr[6]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr7",    "set foreground color expression", OFFSET(fontcolor_expr[7]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr8",    "set foreground color expression", OFFSET(fontcolor_expr[8]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr9",    "set foreground color expression", OFFSET(fontcolor_expr[9]),   AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr10",   "set foreground color expression", OFFSET(fontcolor_expr[10]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr11",   "set foreground color expression", OFFSET(fontcolor_expr[11]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr12",   "set foreground color expression", OFFSET(fontcolor_expr[12]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr13",   "set foreground color expression", OFFSET(fontcolor_expr[13]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr14",   "set foreground color expression", OFFSET(fontcolor_expr[14]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr15",   "set foreground color expression", OFFSET(fontcolor_expr[15]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr16",   "set foreground color expression", OFFSET(fontcolor_expr[16]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr17",   "set foreground color expression", OFFSET(fontcolor_expr[17]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr18",   "set foreground color expression", OFFSET(fontcolor_expr[18]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr19",   "set foreground color expression", OFFSET(fontcolor_expr[19]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr20",   "set foreground color expression", OFFSET(fontcolor_expr[20]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr21",   "set foreground color expression", OFFSET(fontcolor_expr[21]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr22",   "set foreground color expression", OFFSET(fontcolor_expr[22]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr23",   "set foreground color expression", OFFSET(fontcolor_expr[23]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr24",   "set foreground color expression", OFFSET(fontcolor_expr[24]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr25",   "set foreground color expression", OFFSET(fontcolor_expr[25]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr26",   "set foreground color expression", OFFSET(fontcolor_expr[26]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr27",   "set foreground color expression", OFFSET(fontcolor_expr[27]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr28",   "set foreground color expression", OFFSET(fontcolor_expr[28]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr29",   "set foreground color expression", OFFSET(fontcolor_expr[29]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr30",   "set foreground color expression", OFFSET(fontcolor_expr[30]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "fc_expr31",   "set foreground color expression", OFFSET(fontcolor_expr[31]),  AV_OPT_TYPE_STRING,  {.str=NULL}, 0, 0, FLAGS },
+    { "boxcolor",    "set box color",        OFFSET(boxcolor[0].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc0",         "set box color",        OFFSET(boxcolor[0].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc1",         "set box color",        OFFSET(boxcolor[1].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc2",         "set box color",        OFFSET(boxcolor[2].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc3",         "set box color",        OFFSET(boxcolor[3].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc4",         "set box color",        OFFSET(boxcolor[4].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc5",         "set box color",        OFFSET(boxcolor[5].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc6",         "set box color",        OFFSET(boxcolor[6].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc7",         "set box color",        OFFSET(boxcolor[7].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc8",         "set box color",        OFFSET(boxcolor[8].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc9",         "set box color",        OFFSET(boxcolor[9].rgba),   AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc10",        "set box color",        OFFSET(boxcolor[10].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc11",        "set box color",        OFFSET(boxcolor[11].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc12",        "set box color",        OFFSET(boxcolor[12].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc13",        "set box color",        OFFSET(boxcolor[13].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc14",        "set box color",        OFFSET(boxcolor[14].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc15",        "set box color",        OFFSET(boxcolor[15].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc16",        "set box color",        OFFSET(boxcolor[16].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc17",        "set box color",        OFFSET(boxcolor[17].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc18",        "set box color",        OFFSET(boxcolor[18].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc19",        "set box color",        OFFSET(boxcolor[19].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc20",        "set box color",        OFFSET(boxcolor[20].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc21",        "set box color",        OFFSET(boxcolor[21].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc22",        "set box color",        OFFSET(boxcolor[22].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc23",        "set box color",        OFFSET(boxcolor[23].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc24",        "set box color",        OFFSET(boxcolor[24].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc25",        "set box color",        OFFSET(boxcolor[25].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc26",        "set box color",        OFFSET(boxcolor[26].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc27",        "set box color",        OFFSET(boxcolor[27].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc28",        "set box color",        OFFSET(boxcolor[28].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc29",        "set box color",        OFFSET(boxcolor[29].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc30",        "set box color",        OFFSET(boxcolor[30].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bc31",        "set box color",        OFFSET(boxcolor[31].rgba),  AV_OPT_TYPE_COLOR,  {.str="white"}, 0, 0, FLAGS },
+    { "bordercolor", "set border color",     OFFSET(bordercolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "shadowcolor", "set shadow color",     OFFSET(shadowcolor.rgba),   AV_OPT_TYPE_COLOR,  {.str="black"}, 0, 0, FLAGS },
+    { "box",         "set box",              OFFSET(draw_box),           AV_OPT_TYPE_BOOL,   {.i64=0},     0,        1       , FLAGS },
+    { "boxborderw",  "set box borders width", OFFSET(boxborderw[0]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb0",         "set box borders width", OFFSET(boxborderw[0]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb1",         "set box borders width", OFFSET(boxborderw[1]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb2",         "set box borders width", OFFSET(boxborderw[2]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb3",         "set box borders width", OFFSET(boxborderw[3]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb4",         "set box borders width", OFFSET(boxborderw[4]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb5",         "set box borders width", OFFSET(boxborderw[5]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb6",         "set box borders width", OFFSET(boxborderw[6]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb7",         "set box borders width", OFFSET(boxborderw[7]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb8",         "set box borders width", OFFSET(boxborderw[8]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb9",         "set box borders width", OFFSET(boxborderw[9]),        AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb10",        "set box borders width", OFFSET(boxborderw[10]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb11",        "set box borders width", OFFSET(boxborderw[11]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb12",        "set box borders width", OFFSET(boxborderw[12]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb13",        "set box borders width", OFFSET(boxborderw[13]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb14",        "set box borders width", OFFSET(boxborderw[14]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb15",        "set box borders width", OFFSET(boxborderw[15]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb16",        "set box borders width", OFFSET(boxborderw[16]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb17",        "set box borders width", OFFSET(boxborderw[17]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb18",        "set box borders width", OFFSET(boxborderw[18]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb19",        "set box borders width", OFFSET(boxborderw[19]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb20",        "set box borders width", OFFSET(boxborderw[20]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb21",        "set box borders width", OFFSET(boxborderw[21]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb22",        "set box borders width", OFFSET(boxborderw[22]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb23",        "set box borders width", OFFSET(boxborderw[23]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb24",        "set box borders width", OFFSET(boxborderw[24]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb25",        "set box borders width", OFFSET(boxborderw[25]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb26",        "set box borders width", OFFSET(boxborderw[26]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb27",        "set box borders width", OFFSET(boxborderw[27]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb28",        "set box borders width", OFFSET(boxborderw[28]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb29",        "set box borders width", OFFSET(boxborderw[29]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb30",        "set box borders width", OFFSET(boxborderw[30]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "bb31",        "set box borders width", OFFSET(boxborderw[31]),       AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "line_spacing",  "set line spacing in pixels", OFFSET(line_spacing),   AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX,FLAGS },
+    { "fontsize",     "set font size",        OFFSET(fontsize_expr[0]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs0",          "set font size",        OFFSET(fontsize_expr[0]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs1",          "set font size",        OFFSET(fontsize_expr[1]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs2",          "set font size",        OFFSET(fontsize_expr[2]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs3",          "set font size",        OFFSET(fontsize_expr[3]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs4",          "set font size",        OFFSET(fontsize_expr[4]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs5",          "set font size",        OFFSET(fontsize_expr[5]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs6",          "set font size",        OFFSET(fontsize_expr[6]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs7",          "set font size",        OFFSET(fontsize_expr[7]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs8",          "set font size",        OFFSET(fontsize_expr[8]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs9",          "set font size",        OFFSET(fontsize_expr[9]),      AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs10",         "set font size",        OFFSET(fontsize_expr[10]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs11",         "set font size",        OFFSET(fontsize_expr[11]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs12",         "set font size",        OFFSET(fontsize_expr[12]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs13",         "set font size",        OFFSET(fontsize_expr[13]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs14",         "set font size",        OFFSET(fontsize_expr[14]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs15",         "set font size",        OFFSET(fontsize_expr[15]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs16",         "set font size",        OFFSET(fontsize_expr[16]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs17",         "set font size",        OFFSET(fontsize_expr[17]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs18",         "set font size",        OFFSET(fontsize_expr[18]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs19",         "set font size",        OFFSET(fontsize_expr[19]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs20",         "set font size",        OFFSET(fontsize_expr[20]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs21",         "set font size",        OFFSET(fontsize_expr[21]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs22",         "set font size",        OFFSET(fontsize_expr[22]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs23",         "set font size",        OFFSET(fontsize_expr[23]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs24",         "set font size",        OFFSET(fontsize_expr[24]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs25",         "set font size",        OFFSET(fontsize_expr[25]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs26",         "set font size",        OFFSET(fontsize_expr[26]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs27",         "set font size",        OFFSET(fontsize_expr[27]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs28",         "set font size",        OFFSET(fontsize_expr[28]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs29",         "set font size",        OFFSET(fontsize_expr[29]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs30",         "set font size",        OFFSET(fontsize_expr[30]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "fs31",         "set font size",        OFFSET(fontsize_expr[31]),     AV_OPT_TYPE_STRING, {.str="36"},  0, 0 , FLAGS },
+    { "x",            "set x expression",     OFFSET(x_expr[0]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y",            "set y expression",     OFFSET(y_expr[0]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x0",           "set x expression",     OFFSET(x_expr[0]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y0",           "set y expression",     OFFSET(y_expr[0]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x1",           "set x expression",     OFFSET(x_expr[1]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y1",           "set y expression",     OFFSET(y_expr[1]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x2",           "set x expression",     OFFSET(x_expr[2]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y2",           "set y expression",     OFFSET(y_expr[2]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x3",           "set x expression",     OFFSET(x_expr[3]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y3",           "set y expression",     OFFSET(y_expr[3]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x4",           "set x expression",     OFFSET(x_expr[4]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y4",           "set y expression",     OFFSET(y_expr[4]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x5",           "set x expression",     OFFSET(x_expr[5]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y5",           "set y expression",     OFFSET(y_expr[5]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x6",           "set x expression",     OFFSET(x_expr[6]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y6",           "set y expression",     OFFSET(y_expr[6]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x7",           "set x expression",     OFFSET(x_expr[7]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y7",           "set y expression",     OFFSET(y_expr[7]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x8",           "set x expression",     OFFSET(x_expr[8]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y8",           "set y expression",     OFFSET(y_expr[8]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x9",           "set x expression",     OFFSET(x_expr[9]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y9",           "set y expression",     OFFSET(y_expr[9]),             AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x10",          "set x expression",     OFFSET(x_expr[10]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y10",          "set y expression",     OFFSET(y_expr[10]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x11",          "set x expression",     OFFSET(x_expr[11]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y11",          "set y expression",     OFFSET(y_expr[11]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x12",          "set x expression",     OFFSET(x_expr[12]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y12",          "set y expression",     OFFSET(y_expr[12]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x13",          "set x expression",     OFFSET(x_expr[13]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y13",          "set y expression",     OFFSET(y_expr[13]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x14",          "set x expression",     OFFSET(x_expr[14]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y14",          "set y expression",     OFFSET(y_expr[14]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x15",          "set x expression",     OFFSET(x_expr[15]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y15",          "set y expression",     OFFSET(y_expr[15]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x16",          "set x expression",     OFFSET(x_expr[16]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y16",          "set y expression",     OFFSET(y_expr[16]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x17",          "set x expression",     OFFSET(x_expr[17]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y17",          "set y expression",     OFFSET(y_expr[17]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x18",          "set x expression",     OFFSET(x_expr[18]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y18",          "set y expression",     OFFSET(y_expr[18]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x19",          "set x expression",     OFFSET(x_expr[19]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y19",          "set y expression",     OFFSET(y_expr[19]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x20",          "set x expression",     OFFSET(x_expr[20]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y20",          "set y expression",     OFFSET(y_expr[20]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x21",          "set x expression",     OFFSET(x_expr[21]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y21",          "set y expression",     OFFSET(y_expr[21]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x22",          "set x expression",     OFFSET(x_expr[22]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y22",          "set y expression",     OFFSET(y_expr[22]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x23",          "set x expression",     OFFSET(x_expr[23]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y23",          "set y expression",     OFFSET(y_expr[23]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x24",          "set x expression",     OFFSET(x_expr[24]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y24",          "set y expression",     OFFSET(y_expr[24]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x25",          "set x expression",     OFFSET(x_expr[25]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y25",          "set y expression",     OFFSET(y_expr[25]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x26",          "set x expression",     OFFSET(x_expr[26]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y26",          "set y expression",     OFFSET(y_expr[26]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x27",          "set x expression",     OFFSET(x_expr[27]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y27",          "set y expression",     OFFSET(y_expr[27]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x28",          "set x expression",     OFFSET(x_expr[28]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y28",          "set y expression",     OFFSET(y_expr[28]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x29",          "set x expression",     OFFSET(x_expr[29]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y29",          "set y expression",     OFFSET(y_expr[29]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x30",          "set x expression",     OFFSET(x_expr[30]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y30",          "set y expression",     OFFSET(y_expr[30]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "x31",          "set x expression",     OFFSET(x_expr[31]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "y31",          "set y expression",     OFFSET(y_expr[31]),            AV_OPT_TYPE_STRING, {.str="0"},   0, 0, FLAGS },
+    { "shadowx",     "set shadow x offset",  OFFSET(shadowx),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS },
+    { "shadowy",     "set shadow y offset",  OFFSET(shadowy),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS },
+    { "borderw",     "set border width",     OFFSET(borderw),            AV_OPT_TYPE_INT,    {.i64=0},     INT_MIN,  INT_MAX , FLAGS },
+    { "tabsize",     "set tab size",         OFFSET(tabsize[0]),         AV_OPT_TYPE_INT,    {.i64=4},     0,        INT_MAX , FLAGS },
+    { "basetime",    "set base time",        OFFSET(basetime),           AV_OPT_TYPE_INT64,  {.i64=AV_NOPTS_VALUE}, INT64_MIN, INT64_MAX , FLAGS },
+#if CONFIG_LIBFONTCONFIG
+    { "font",      "Font name",            OFFSET(font[0]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f0",        "Font name",            OFFSET(font[0]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f1",        "Font name",            OFFSET(font[1]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f2",        "Font name",            OFFSET(font[2]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f3",        "Font name",            OFFSET(font[3]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f4",        "Font name",            OFFSET(font[4]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f5",        "Font name",            OFFSET(font[5]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f6",        "Font name",            OFFSET(font[6]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f7",        "Font name",            OFFSET(font[7]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f8",        "Font name",            OFFSET(font[8]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f9",        "Font name",            OFFSET(font[9]),               AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f10",       "Font name",            OFFSET(font[10]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f11",       "Font name",            OFFSET(font[11]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f12",       "Font name",            OFFSET(font[12]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f13",       "Font name",            OFFSET(font[13]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f14",       "Font name",            OFFSET(font[14]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f15",       "Font name",            OFFSET(font[15]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f16",       "Font name",            OFFSET(font[16]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f17",       "Font name",            OFFSET(font[17]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f18",       "Font name",            OFFSET(font[18]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f19",       "Font name",            OFFSET(font[19]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f20",       "Font name",            OFFSET(font[20]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f21",       "Font name",            OFFSET(font[21]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f22",       "Font name",            OFFSET(font[22]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f23",       "Font name",            OFFSET(font[23]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f24",       "Font name",            OFFSET(font[24]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f25",       "Font name",            OFFSET(font[25]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f26",       "Font name",            OFFSET(font[26]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f27",       "Font name",            OFFSET(font[27]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f28",       "Font name",            OFFSET(font[28]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f29",       "Font name",            OFFSET(font[29]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f30",       "Font name",            OFFSET(font[30]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+    { "f31",       "Font name",            OFFSET(font[31]),              AV_OPT_TYPE_STRING, { .str = "Sans" },           .flags = FLAGS  },
+#endif
+    { "expansion", "set the expansion mode", OFFSET(exp_mode), AV_OPT_TYPE_INT, {.i64=EXP_NORMAL}, 0, 2, FLAGS, "expansion" },
+        { "none",     "set no expansion",                    OFFSET(exp_mode), AV_OPT_TYPE_CONST, {.i64=EXP_NONE},     0, 0, FLAGS, "expansion" },
+        { "normal",   "set normal expansion",                OFFSET(exp_mode), AV_OPT_TYPE_CONST, {.i64=EXP_NORMAL},   0, 0, FLAGS, "expansion" },
+        { "strftime", "set strftime expansion (deprecated)", OFFSET(exp_mode), AV_OPT_TYPE_CONST, {.i64=EXP_STRFTIME}, 0, 0, FLAGS, "expansion" },
+    { "timecode",        "set initial timecode",             OFFSET(tc_opt_string), AV_OPT_TYPE_STRING,   {.str=NULL}, 0, 0, FLAGS },
+    { "tc24hmax",        "set 24 hours max (timecode only)", OFFSET(tc24hmax),      AV_OPT_TYPE_BOOL,     {.i64=0},    0,        1, FLAGS },
+    { "timecode_rate",   "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},    0,  INT_MAX, FLAGS },
+    { "r",               "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},    0,  INT_MAX, FLAGS },
+    { "rate",            "set rate (timecode only)",         OFFSET(tc_rate),       AV_OPT_TYPE_RATIONAL, {.dbl=0},    0,  INT_MAX, FLAGS },
+    { "reload",     "reload text file for each frame",                       OFFSET(reload),     AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    {  "alpha",       "apply alpha while rendering", OFFSET(a_expr),      AV_OPT_TYPE_STRING, { .str = "1"     },          .flags = FLAGS  },
+    { "fix_bounds", "check and fix text coords to avoid clipping", OFFSET(fix_bounds), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    { "start_number", "start frame number for n/frame_num variable", OFFSET(start_number), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
+
+#if CONFIG_LIBFRIBIDI
+    { "text_shaping", "attempt to shape text before drawing", OFFSET(text_shaping), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
+#endif
+
+    /* FT_LOAD_* flags */
+    { "ft_load_flags", "set font loading flags for libfreetype", OFFSET(ft_load_flags), AV_OPT_TYPE_FLAGS, { .i64 = FT_LOAD_DEFAULT }, 0, INT_MAX, FLAGS, "ft_load_flags" },
+        { "default",                     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_DEFAULT },                     .flags = FLAGS, .unit = "ft_load_flags" },
+        { "no_scale",                    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_NO_SCALE },                    .flags = FLAGS, .unit = "ft_load_flags" },
+        { "no_hinting",                  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_NO_HINTING },                  .flags = FLAGS, .unit = "ft_load_flags" },
+        { "render",                      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_RENDER },                      .flags = FLAGS, .unit = "ft_load_flags" },
+        { "no_bitmap",                   NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_NO_BITMAP },                   .flags = FLAGS, .unit = "ft_load_flags" },
+        { "vertical_layout",             NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_VERTICAL_LAYOUT },             .flags = FLAGS, .unit = "ft_load_flags" },
+        { "force_autohint",              NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_FORCE_AUTOHINT },              .flags = FLAGS, .unit = "ft_load_flags" },
+        { "crop_bitmap",                 NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_CROP_BITMAP },                 .flags = FLAGS, .unit = "ft_load_flags" },
+        { "pedantic",                    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_PEDANTIC },                    .flags = FLAGS, .unit = "ft_load_flags" },
+        { "ignore_global_advance_width", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH }, .flags = FLAGS, .unit = "ft_load_flags" },
+        { "no_recurse",                  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_NO_RECURSE },                  .flags = FLAGS, .unit = "ft_load_flags" },
+        { "ignore_transform",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_IGNORE_TRANSFORM },            .flags = FLAGS, .unit = "ft_load_flags" },
+        { "monochrome",                  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_MONOCHROME },                  .flags = FLAGS, .unit = "ft_load_flags" },
+        { "linear_design",               NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_LINEAR_DESIGN },               .flags = FLAGS, .unit = "ft_load_flags" },
+        { "no_autohint",                 NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FT_LOAD_NO_AUTOHINT },                 .flags = FLAGS, .unit = "ft_load_flags" },
+    { "optimize_upload", "Decrease the drawtext frame uploading frequency", OFFSET(optimize_upload), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS},
+    { "use_watermark", "Use performance optimizations", OFFSET(use_watermark), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS }, // Deprecated language
+    { "perf_optimization", "Use performance optimizations", OFFSET(use_watermark),       AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
+    NI_FILT_OPTION_KEEPALIVE,
+    NI_FILT_OPTION_BUFFER_LIMIT,
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ni_drawtext);
+
+#undef __FTERRORS_H__
+#define FT_ERROR_START_LIST {
+#define FT_ERRORDEF(e, v, s) { (e), (s) },
+#define FT_ERROR_END_LIST { 0, NULL } };
+
+static const struct ft_error {
+    int err;
+    const char *err_msg;
+} ft_errors[] =
+#include FT_ERRORS_H
+
+#define FT_ERRMSG(e) ft_errors[e].err_msg
+
+typedef struct Glyph {
+    FT_Glyph glyph;
+    FT_Glyph border_glyph;
+    uint32_t code;
+    unsigned int fontsize;
+    FT_Bitmap bitmap; ///< array holding bitmaps of font
+    FT_Bitmap border_bitmap; ///< array holding bitmaps of font border
+    FT_BBox bbox;
+    int advance;
+    int bitmap_left;
+    int bitmap_top;
+} Glyph;
+
+static int glyph_cmp(const void *key, const void *b)
+{
+    const Glyph *a = key, *bb = b;
+    int64_t diff = (int64_t)a->code - (int64_t)bb->code;
+
+    if (diff != 0)
+        return diff > 0 ? 1 : -1;
+    else
+        return FFDIFFSIGN((int64_t)a->fontsize, (int64_t)bb->fontsize);
+}
+
+/**
+ * Load glyphs corresponding to the UTF-32 codepoint code.
+ */
+static int load_glyph(AVFilterContext *ctx, Glyph **glyph_ptr, uint32_t code, int index)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    FT_BitmapGlyph bitmapglyph;
+    Glyph *glyph, dummy = { 0 };
+    struct AVTreeNode *node = NULL;
+    int ret;
+
+    /* load glyph into s->face->glyph */
+    if (FT_Load_Char(s->face[index], code, s->ft_load_flags))
+        return AVERROR(EINVAL);
+
+    /* if glyph has already insert into s->glyphs, return directly */
+    dummy.code = code;
+    dummy.fontsize = s->fontsize[index];
+    glyph = av_tree_find(s->glyphs, &dummy, glyph_cmp, NULL);
+    if (glyph) {
+        if (glyph_ptr)
+            *glyph_ptr = glyph;
+        return 0;
+    }
+
+    glyph = av_mallocz(sizeof(*glyph));
+    if (!glyph) {
+        ret = AVERROR(ENOMEM);
+        goto error;
+    }
+    glyph->code  = code;
+    glyph->fontsize = s->fontsize[index];
+
+    if (FT_Get_Glyph(s->face[index]->glyph, &glyph->glyph)) {
+        ret = AVERROR(EINVAL);
+        goto error;
+    }
+    if (s->borderw) {
+        glyph->border_glyph = glyph->glyph;
+        if (FT_Glyph_StrokeBorder(&glyph->border_glyph, s->stroker, 0, 0) ||
+            FT_Glyph_To_Bitmap(&glyph->border_glyph, FT_RENDER_MODE_NORMAL, 0, 1)) {
+            ret = AVERROR_EXTERNAL;
+            goto error;
+        }
+        bitmapglyph = (FT_BitmapGlyph) glyph->border_glyph;
+        glyph->border_bitmap = bitmapglyph->bitmap;
+    }
+    if (FT_Glyph_To_Bitmap(&glyph->glyph, FT_RENDER_MODE_NORMAL, 0, 1)) {
+        ret = AVERROR_EXTERNAL;
+        goto error;
+    }
+    bitmapglyph = (FT_BitmapGlyph) glyph->glyph;
+
+    glyph->bitmap      = bitmapglyph->bitmap;
+    glyph->bitmap_left = bitmapglyph->left;
+    glyph->bitmap_top  = bitmapglyph->top;
+    glyph->advance     = s->face[index]->glyph->advance.x >> 6;
+
+    /* measure text height to calculate text_height (or the maximum text height) */
+    FT_Glyph_Get_CBox(glyph->glyph, ft_glyph_bbox_pixels, &glyph->bbox);
+
+    /* cache the newly created glyph */
+    if (!(node = av_tree_node_alloc())) {
+        ret = AVERROR(ENOMEM);
+        goto error;
+    }
+    av_tree_insert(&s->glyphs, glyph, glyph_cmp, &node);
+
+    if (glyph_ptr)
+        *glyph_ptr = glyph;
+    return 0;
+
+error:
+    if (glyph)
+        av_freep(&glyph->glyph);
+
+    av_freep(&glyph);
+    av_freep(&node);
+    return ret;
+}
+
+// Convert a string formatted as "n1|n2|...|nN" into an integer array
+static int string_to_array(const char *source, int *result, int result_size)
+{
+    int counter = 0, size = strlen(source) + 1;
+    char *saveptr, *curval, *dup = av_malloc(size);
+    if (!dup)
+        return 0;
+    av_strlcpy(dup, source, size);
+    if (result_size > 0 && (curval = av_strtok(dup, "|", &saveptr))) {
+        do {
+            result[counter++] = atoi(curval);
+        } while ((curval = av_strtok(NULL, "|", &saveptr)) && counter < result_size);
+    }
+    av_free(dup);
+    return counter;
+}
+
+// convert FFmpeg AV_PIX_FMT_ to NI_PIX_FMT_
+static int ff_to_ni_pix_fmt(int ff_av_pix_fmt)
+{
+    int pixel_format;
+
+    switch (ff_av_pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+        pixel_format = NI_PIX_FMT_YUV420P;
+        break;
+    case AV_PIX_FMT_YUV420P10LE:
+        pixel_format = NI_PIX_FMT_YUV420P10LE;
+        break;
+    case AV_PIX_FMT_NV12:
+        pixel_format = NI_PIX_FMT_NV12;
+        break;
+    case AV_PIX_FMT_NV16:
+        pixel_format = NI_PIX_FMT_NV16;
+        break;
+    case AV_PIX_FMT_YUYV422:
+        pixel_format = NI_PIX_FMT_YUYV422;
+        break;
+    case AV_PIX_FMT_UYVY422:
+        pixel_format = NI_PIX_FMT_UYVY422;
+        break;
+    case AV_PIX_FMT_P010LE:
+        pixel_format = NI_PIX_FMT_P010LE;
+        break;
+    case AV_PIX_FMT_RGBA:
+        pixel_format = NI_PIX_FMT_RGBA;
+        break;
+    case AV_PIX_FMT_BGRA:
+        pixel_format = NI_PIX_FMT_BGRA;
+        break;
+    case AV_PIX_FMT_ABGR:
+        pixel_format = NI_PIX_FMT_ABGR;
+        break;
+    case AV_PIX_FMT_ARGB:
+        pixel_format = NI_PIX_FMT_ARGB;
+        break;
+    case AV_PIX_FMT_BGR0:
+        pixel_format = NI_PIX_FMT_BGR0;
+        break;
+    default:
+        av_log(NULL, AV_LOG_ERROR, "Pixel %d format not supported.\n",
+               ff_av_pix_fmt);
+        return AVERROR(EINVAL);
+    }
+    return pixel_format;
+}
+
+static av_cold int set_fontsize(AVFilterContext *ctx, unsigned int fontsize, int index)
+{
+    int err;
+    NetIntDrawTextContext *s = ctx->priv;
+
+    if ((err = FT_Set_Pixel_Sizes(s->face[index], 0, fontsize))) {
+        av_log(ctx, AV_LOG_ERROR, "Could not set font size to %d pixels: %s\n",
+               fontsize, FT_ERRMSG(err));
+        return AVERROR(EINVAL);
+    }
+
+    s->fontsize[index] = fontsize;
+
+    return 0;
+}
+
+static av_cold int parse_fontsize(AVFilterContext *ctx, int index)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    int err;
+
+    if (s->fontsize_pexpr[index])
+        return 0;
+
+    if (s->fontsize_expr[index] == NULL)
+        return AVERROR(EINVAL);
+
+    if ((err = av_expr_parse(&s->fontsize_pexpr[index], s->fontsize_expr[index], var_names,
+                            NULL, NULL, fun2_names, fun2, 0, ctx)) < 0)
+        return err;
+
+    return 0;
+}
+
+static av_cold int update_fontsize(AVFilterContext *ctx, int index)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    unsigned int fontsize = s->default_fontsize;
+    int err;
+    double size, roundedsize;
+
+    // if no fontsize specified use the default
+    if (s->fontsize_expr[index] != NULL) {
+        if ((err = parse_fontsize(ctx, index)) < 0)
+        return err;
+
+        size = av_expr_eval(s->fontsize_pexpr[index], s->var_values, &s->prng);
+
+        if (!isnan(size)) {
+            roundedsize = round(size);
+            // test for overflow before cast
+            if (!(roundedsize > INT_MIN && roundedsize < INT_MAX)) {
+                av_log(ctx, AV_LOG_ERROR, "fontsize overflow\n");
+                return AVERROR(EINVAL);
+            }
+
+            fontsize = (int)roundedsize;
+        }
+    }
+
+    if (fontsize == 0)
+        fontsize = 1;
+
+    // no change
+    if (fontsize == s->fontsize[index])
+        return 0;
+
+    return set_fontsize(ctx, fontsize, index);
+}
+
+static int load_font_file(AVFilterContext *ctx, const char *path, int index, int text_index)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    int err;
+
+    err = FT_New_Face(s->library, path, index, &s->face[text_index]);
+    if (err) {
+#if !CONFIG_LIBFONTCONFIG
+        av_log(ctx, AV_LOG_ERROR, "Could not load font \"%s\": %s\n",
+               s->fontfile[text_index], FT_ERRMSG(err));
+#endif
+        return AVERROR(EINVAL);
+    }
+    return 0;
+}
+
+#if CONFIG_LIBFONTCONFIG
+static int load_font_fontconfig(AVFilterContext *ctx, int text_index)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    FcConfig *fontconfig;
+    FcPattern *pat, *best;
+    FcResult result = FcResultMatch;
+    FcChar8 *filename;
+    int index;
+    double size;
+    int err = AVERROR(ENOENT);
+    int parse_err;
+
+    fontconfig = FcInitLoadConfigAndFonts();
+    if (!fontconfig) {
+        av_log(ctx, AV_LOG_ERROR, "impossible to init fontconfig\n");
+        return AVERROR_UNKNOWN;
+    }
+    pat = FcNameParse(s->fontfile[text_index] ? s->fontfile[text_index] :
+                          (uint8_t *)(intptr_t)"default");
+    if (!pat) {
+        av_log(ctx, AV_LOG_ERROR, "could not parse fontconfig pat");
+        return AVERROR(EINVAL);
+    }
+
+    FcPatternAddString(pat, FC_FAMILY, s->font[text_index]);
+
+    parse_err = parse_fontsize(ctx, text_index);
+    if (!parse_err) {
+        double size = av_expr_eval(s->fontsize_pexpr[text_index], s->var_values, &s->prng);
+
+        if (isnan(size)) {
+            av_log(ctx, AV_LOG_ERROR, "impossible to find font information");
+            return AVERROR(EINVAL);
+        }
+
+        FcPatternAddDouble(pat, FC_SIZE, size);
+    }
+
+    FcDefaultSubstitute(pat);
+
+    if (!FcConfigSubstitute(fontconfig, pat, FcMatchPattern)) {
+        av_log(ctx, AV_LOG_ERROR, "could not substitute fontconfig options"); /* very unlikely */
+        FcPatternDestroy(pat);
+        return AVERROR(ENOMEM);
+    }
+
+    best = FcFontMatch(fontconfig, pat, &result);
+    FcPatternDestroy(pat);
+
+    if (!best || result != FcResultMatch) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Cannot find a valid font for the family %s\n",
+               s->font[text_index]);
+        goto fail;
+    }
+
+    if (
+        FcPatternGetInteger(best, FC_INDEX, 0, &index   ) != FcResultMatch ||
+        FcPatternGetDouble (best, FC_SIZE,  0, &size    ) != FcResultMatch) {
+        av_log(ctx, AV_LOG_ERROR, "impossible to find font information");
+        return AVERROR(EINVAL);
+    }
+
+    if (FcPatternGetString(best, FC_FILE, 0, &filename) != FcResultMatch) {
+        av_log(ctx, AV_LOG_ERROR, "No file path for %s\n",
+               s->font[text_index]);
+        goto fail;
+    }
+
+    av_log(ctx, AV_LOG_INFO, "Using \"%s\"\n", filename);
+    if (parse_err)
+        s->default_fontsize = size + 0.5;
+
+    err = load_font_file(ctx, filename, index, text_index);
+    if (err)
+        return err;
+    FcConfigDestroy(fontconfig);
+fail:
+    FcPatternDestroy(best);
+    return err;
+}
+#endif
+
+static int load_font(AVFilterContext *ctx, int index)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    int err;
+
+    /* load the face, and set up the encoding, which is by default UTF-8 */
+    if (s->fontfile[index]) {
+        err = load_font_file(ctx, s->fontfile[index], 0, index);
+        if (!err)
+            return 0;
+    }
+#if CONFIG_LIBFONTCONFIG
+    err = load_font_fontconfig(ctx, index);
+    if (!err)
+        return 0;
+#endif
+    return err;
+}
+
+static int load_textfile(AVFilterContext *ctx)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    int err;
+    uint8_t *textbuf;
+    uint8_t *tmp;
+    size_t textbuf_size;
+
+    if ((err = av_file_map(s->textfile, &textbuf, &textbuf_size, 0, ctx)) < 0) {
+        av_log(ctx, AV_LOG_ERROR,
+               "The text file '%s' could not be read or is empty\n",
+               s->textfile);
+        return err;
+    }
+
+    if (textbuf_size > SIZE_MAX - 1 || !(tmp = av_realloc(s->text[0], textbuf_size + 1))) {
+        av_file_unmap(textbuf, textbuf_size);
+        return AVERROR(ENOMEM);
+    }
+    s->text[0] = tmp;
+    memcpy(s->text[0], textbuf, textbuf_size);
+    s->text[0][textbuf_size] = 0;
+    av_file_unmap(textbuf, textbuf_size);
+
+    return 0;
+}
+
+static inline int is_newline(uint32_t c)
+{
+    return c == '\n' || c == '\r' || c == '\f' || c == '\v';
+}
+
+#if CONFIG_LIBFRIBIDI
+static int shape_text(AVFilterContext *ctx)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    uint8_t *tmp;
+    int ret = AVERROR(ENOMEM);
+    static const FriBidiFlags flags = FRIBIDI_FLAGS_DEFAULT |
+                                      FRIBIDI_FLAGS_ARABIC;
+    FriBidiChar *unicodestr = NULL;
+    FriBidiStrIndex len;
+    FriBidiParType direction = FRIBIDI_PAR_LTR;
+    FriBidiStrIndex line_start = 0;
+    FriBidiStrIndex line_end = 0;
+    FriBidiLevel *embedding_levels = NULL;
+    FriBidiArabicProp *ar_props = NULL;
+    FriBidiCharType *bidi_types = NULL;
+    FriBidiStrIndex i,j;
+
+    len = strlen(s->text[0]);
+    if (!(unicodestr = av_malloc_array(len, sizeof(*unicodestr)))) {
+        goto out;
+    }
+    len = fribidi_charset_to_unicode(FRIBIDI_CHAR_SET_UTF8,
+                                     s->text[0], len, unicodestr);
+
+    bidi_types = av_malloc_array(len, sizeof(*bidi_types));
+    if (!bidi_types) {
+        goto out;
+    }
+
+    fribidi_get_bidi_types(unicodestr, len, bidi_types);
+
+    embedding_levels = av_malloc_array(len, sizeof(*embedding_levels));
+    if (!embedding_levels) {
+        goto out;
+    }
+
+    if (!fribidi_get_par_embedding_levels(bidi_types, len, &direction,
+                                          embedding_levels)) {
+        goto out;
+    }
+
+    ar_props = av_malloc_array(len, sizeof(*ar_props));
+    if (!ar_props) {
+        goto out;
+    }
+
+    fribidi_get_joining_types(unicodestr, len, ar_props);
+    fribidi_join_arabic(bidi_types, len, embedding_levels, ar_props);
+    fribidi_shape(flags, embedding_levels, len, ar_props, unicodestr);
+
+    for (line_end = 0, line_start = 0; line_end < len; line_end++) {
+        if (is_newline(unicodestr[line_end]) || line_end == len - 1) {
+            if (!fribidi_reorder_line(flags, bidi_types,
+                                      line_end - line_start + 1, line_start,
+                                      direction, embedding_levels, unicodestr,
+                                      NULL)) {
+                goto out;
+            }
+            line_start = line_end + 1;
+        }
+    }
+
+    /* Remove zero-width fill chars put in by libfribidi */
+    for (i = 0, j = 0; i < len; i++)
+        if (unicodestr[i] != FRIBIDI_CHAR_FILL)
+            unicodestr[j++] = unicodestr[i];
+    len = j;
+
+    if (!(tmp = av_realloc(s->text[0], (len * 4 + 1) * sizeof(*s->text[0])))) {
+        /* Use len * 4, as a unicode character can be up to 4 bytes in UTF-8 */
+        goto out;
+    }
+
+    s->text[0] = tmp;
+    len = fribidi_unicode_to_charset(FRIBIDI_CHAR_SET_UTF8,
+                                     unicodestr, len, s->text[0]);
+    ret = 0;
+
+out:
+    av_free(unicodestr);
+    av_free(embedding_levels);
+    av_free(ar_props);
+    av_free(bidi_types);
+    return ret;
+}
+#endif
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] =
+    {AV_PIX_FMT_NI_QUAD, AV_PIX_FMT_NONE};
+    AVFilterFormats *formats;
+
+    formats = ff_make_format_list(pix_fmts);
+
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(ctx, formats);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    NetIntDrawTextContext *s = ctx->priv;
+
+    AVHWFramesContext *in_frames_ctx;
+    AVHWFramesContext *out_frames_ctx;
+    int ni_pix_fmt;
+
+    av_log(ctx, AV_LOG_DEBUG, "%s inlink wxh %dx%d\n", __func__,
+           inlink->w, inlink->h);
+
+    outlink->w = inlink->w;
+    outlink->h = inlink->h;
+
+    FilterLink *li = ff_filter_link(inlink);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+    av_log(ctx, AV_LOG_INFO, "vf_drawtext_ni.c %s in_frames_ctx->sw_format: %d "
+           "%s\n", __func__, in_frames_ctx->sw_format,
+           av_get_pix_fmt_name(in_frames_ctx->sw_format));
+    if ((ni_pix_fmt = ff_to_ni_pix_fmt(in_frames_ctx->sw_format)) < 0) {
+        return AVERROR(EINVAL);
+    }
+
+    s->out_frames_ref = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+    if (!s->out_frames_ref)
+        return AVERROR(ENOMEM);
+
+    out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+    out_frames_ctx->format            = AV_PIX_FMT_NI_QUAD;
+    out_frames_ctx->width             = outlink->w;
+    out_frames_ctx->height            = outlink->h;
+    out_frames_ctx->sw_format         = in_frames_ctx->sw_format;
+    out_frames_ctx->initial_pool_size = NI_DRAWTEXT_ID;
+
+    av_hwframe_ctx_init(s->out_frames_ref);
+    FilterLink *lo = ff_filter_link(ctx->outputs[0]);
+    av_buffer_unref(&lo->hw_frames_ctx);
+    lo->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+    if (!lo->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    //The upload will be per frame if frame rate is not specified/determined
+    if (li->frame_rate.den)
+        s->framerate = (li->frame_rate.num + li->frame_rate.den - 1) / li->frame_rate.den;
+
+    if (s->framerate == 0)
+        s->framerate = 1;
+    av_log(ctx, AV_LOG_INFO, "overlay frame upload frequency %d\n", s->framerate);
+
+    return 0;
+}
+
+static int glyph_enu_free(void *opaque, void *elem)
+{
+    Glyph *glyph = elem;
+
+    FT_Done_Glyph(glyph->glyph);
+    FT_Done_Glyph(glyph->border_glyph);
+    av_free(elem);
+    return 0;
+}
+
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    int i, err;
+    NetIntDrawTextContext *s = ctx->priv;
+    Glyph *glyph;
+
+    for (i = 0; i < s->text_num; i++) {
+        av_expr_free(s->fontsize_pexpr[i]);
+        s->fontsize_pexpr[i] = NULL;
+
+        s->fontsize[i] = 0;
+    }
+    for (i = 0; i < MAX_TEXT_NUM; i++) {
+        s->text_last_updated[0] = NULL;
+        s->x_bak[i] = 0;
+        s->y_bak[i] = 0;
+    }
+    s->default_fontsize = 16;
+    s->upload_drawtext_frame = 1;
+    s->keep_overlay = NULL;
+    s->filtered_frame_count=0;
+    s->framerate = 0;
+
+    if (!s->fontfile && !CONFIG_LIBFONTCONFIG) {
+        av_log(ctx, AV_LOG_ERROR, "No font filename provided\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (s->textfile) {
+        if (s->text[0]) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Both text and text file provided. Please provide only one\n");
+            return AVERROR(EINVAL);
+        }
+        if ((err = load_textfile(ctx)) < 0)
+            return err;
+    }
+
+    s->text_num = 0;
+    for (i = 0; i < MAX_TEXT_NUM; i++) {
+        if (!s->text[i]) {
+            break;
+        }
+        s->text_num++;
+    }
+
+    if (s->reload && !s->textfile)
+        av_log(ctx, AV_LOG_WARNING, "No file to reload\n");
+
+    if (s->tc_opt_string) {
+        int ret = av_timecode_init_from_string(&s->tc, s->tc_rate,
+                                               s->tc_opt_string, ctx);
+        if (ret < 0)
+            return ret;
+        if (s->tc24hmax)
+            s->tc.flags |= AV_TIMECODE_FLAG_24HOURSMAX;
+        if (!s->text[0])
+            s->text[0] = av_strdup("");
+    }
+
+    if (!s->text_num) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Either text, a valid file or a timecode must be provided\n");
+        return AVERROR(EINVAL);
+    }
+
+#if CONFIG_LIBFRIBIDI
+    if (s->text_shaping)
+        if ((err = shape_text(ctx)) < 0)
+            return err;
+#endif
+
+    if ((err = FT_Init_FreeType(&(s->library)))) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Could not load FreeType: %s\n", FT_ERRMSG(err));
+        return AVERROR(EINVAL);
+    }
+
+    for (i = 0; i < s->text_num; i++) {
+        if ((err = load_font(ctx, i)) < 0)
+            return err;
+
+        if ((err = update_fontsize(ctx, i)) < 0)
+            return err;
+    }
+
+    if (s->borderw) {
+        if (FT_Stroker_New(s->library, &s->stroker)) {
+            av_log(ctx, AV_LOG_ERROR, "Coult not init FT stroker\n");
+            return AVERROR_EXTERNAL;
+        }
+        FT_Stroker_Set(s->stroker, s->borderw << 6, FT_STROKER_LINECAP_ROUND,
+                       FT_STROKER_LINEJOIN_ROUND, 0);
+    }
+
+    for (i = 0; i < s->text_num; i++) {
+        s->use_kerning[i] = FT_HAS_KERNING(s->face[i]);
+
+        /* load the fallback glyph with code 0 */
+        load_glyph(ctx, NULL, 0, i);
+
+        /* set the tabsize in pixels */
+        if ((err = load_glyph(ctx, &glyph, ' ', i)) < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Could not set tabsize.\n");
+            return err;
+        }
+        if (i > 0) {
+            s->tabsize[i] = s->tabsize[0];
+        }
+        s->tabsize[i] *= glyph->advance;
+
+        if (s->exp_mode == EXP_STRFTIME &&
+            (strchr(s->text[i], '%') || strchr(s->text[i], '\\')))
+            av_log(ctx, AV_LOG_WARNING, "expansion=strftime is deprecated.\n");
+    }
+
+    av_bprint_init(&s->expanded_text, 0, AV_BPRINT_SIZE_UNLIMITED);
+    av_bprint_init(&s->expanded_fontcolor, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    int i;
+    // NI HW frame related uninit
+    av_frame_free(&s->keep_overlay);
+    ni_frame_buffer_free(&s->dl_frame.data.frame);
+    ni_frame_buffer_free(&s->txt_frame.data.frame);
+    av_frame_free(&s->up_frame);
+
+    if (s->api_dst_frame.data.frame.p_buffer) {
+        ni_frame_buffer_free(&s->api_dst_frame.data.frame);
+    }
+
+    if (s->crop_api_dst_frame.data.frame.p_buffer) {
+        ni_frame_buffer_free(&s->crop_api_dst_frame.data.frame);
+    }
+
+    if (s->session_opened) {
+        ni_device_session_close(&s->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&s->api_ctx);
+    }
+
+    if (s->crop_session_opened) {
+        ni_device_session_close(&s->crop_api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&s->crop_api_ctx);
+    }
+
+    av_buffer_unref(&s->hwframe);
+    av_buffer_unref(&s->hwdevice);
+    av_buffer_unref(&s->hw_frames_ctx);
+
+    av_buffer_unref(&s->out_frames_ref);
+
+    for (i = 0; i < s->text_num; i++) {
+        av_expr_free(s->x_pexpr[i]);
+        av_expr_free(s->y_pexpr[i]);
+        av_expr_free(s->fontsize_pexpr[i]);
+        av_free(s->text_last_updated[i]);
+        s->text_last_updated[i] = NULL;
+
+        s->x_pexpr[i] = s->y_pexpr[i] = s->fontsize_pexpr[i] = NULL;
+    }
+    av_expr_free(s->a_pexpr);
+    s->a_pexpr = NULL;
+
+    av_freep(&s->positions);
+    s->nb_positions = 0;
+
+    av_tree_enumerate(s->glyphs, NULL, NULL, glyph_enu_free);
+    av_tree_destroy(s->glyphs);
+    s->glyphs = NULL;
+
+    for (i = 0; i < s->text_num; i++) {
+        FT_Done_Face(s->face[i]);
+    }
+    FT_Stroker_Done(s->stroker);
+    FT_Done_FreeType(s->library);
+
+    av_bprint_finalize(&s->expanded_text, NULL);
+    av_bprint_finalize(&s->expanded_fontcolor, NULL);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    NetIntDrawTextContext *s = ctx->priv;
+    char *expr;
+    int i, ret, flags;
+    AVHWFramesContext *in_frames_ctx;
+
+    FilterLink *li = ff_filter_link(ctx->inputs[0]);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+    av_log(ctx, AV_LOG_INFO, "vf_drawtext_ni.c: inlink->format %d "
+           "in_frames_ctx->sw_format %d %s\n",
+           inlink->format, in_frames_ctx->sw_format,
+           av_get_pix_fmt_name(in_frames_ctx->sw_format));
+
+    switch (in_frames_ctx->sw_format) {
+    case AV_PIX_FMT_NI_QUAD_8_TILE_4X4:
+    case AV_PIX_FMT_NI_QUAD_10_TILE_4X4:
+        av_log(ctx, AV_LOG_ERROR, "Error vf_drawtext_ni.c: frame pixel format "
+               "not supported !\n");
+        return AVERROR(EINVAL);
+    default:
+        break;
+    }
+
+    s->main_has_alpha = ff_fmt_is_in(in_frames_ctx->sw_format, alpha_pix_fmts);
+
+    flags = FF_DRAW_PROCESS_ALPHA;
+    if (ff_draw_init(&s->dc, AV_PIX_FMT_RGBA, flags)
+        < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Error vf_drawtext_ni.c: frame pixel format "
+               "not supported !\n");
+        return AVERROR(EINVAL);
+    } else {
+        av_log(ctx, AV_LOG_INFO, "%s ff_draw_init success main_has_alpha: %d\n",
+               __func__, s->main_has_alpha);
+    }
+
+    for (i = 0; i < s->text_num; i++) {
+        ff_draw_color(&s->dc, &s->fontcolor[i],   s->fontcolor[i].rgba);
+        ff_draw_color(&s->dc, &s->boxcolor[i],    s->boxcolor[i].rgba);
+    }
+    ff_draw_color(&s->dc, &s->shadowcolor, s->shadowcolor.rgba);
+    ff_draw_color(&s->dc, &s->bordercolor, s->bordercolor.rgba);
+
+    s->var_values[VAR_w]     = s->var_values[VAR_W]     = s->var_values[VAR_MAIN_W] = inlink->w;
+    s->var_values[VAR_h]     = s->var_values[VAR_H]     = s->var_values[VAR_MAIN_H] = inlink->h;
+    s->var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ? av_q2d(inlink->sample_aspect_ratio) : 1;
+    s->var_values[VAR_DAR]   = (double)inlink->w / inlink->h * s->var_values[VAR_SAR];
+    s->var_values[VAR_HSUB]  = 1 << s->dc.hsub_max;
+    s->var_values[VAR_VSUB]  = 1 << s->dc.vsub_max;
+    s->var_values[VAR_X]     = NAN;
+    s->var_values[VAR_Y]     = NAN;
+    s->var_values[VAR_T]     = NAN;
+
+    av_lfg_init(&s->prng, av_get_random_seed());
+
+    for (i = 0; i < s->text_num; i++) {
+        av_expr_free(s->x_pexpr[i]);
+        av_expr_free(s->y_pexpr[i]);
+
+        s->x_pexpr[i] = s->y_pexpr[i] = NULL;
+
+        if ((ret = av_expr_parse(&s->x_pexpr[i], expr = s->x_expr[i], var_names,
+                                NULL, NULL, fun2_names, fun2, 0, ctx)) < 0 ||
+            (ret = av_expr_parse(&s->y_pexpr[i], expr = s->y_expr[i], var_names,
+                                NULL, NULL, fun2_names, fun2, 0, ctx)) < 0 ) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to parse expression: %s \n", expr);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    av_expr_free(s->a_pexpr);
+    s->a_pexpr = NULL;
+
+    if (ret = av_expr_parse(&s->a_pexpr, expr = s->a_expr, var_names,
+                                NULL, NULL, fun2_names, fun2, 0, ctx) < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to parse expression: %s \n", expr);
+        return AVERROR(EINVAL);
+    }
+
+    // prep download/upload buffer
+    if (ni_frame_buffer_alloc_dl(&(s->dl_frame.data.frame),
+                                 inlink->w, inlink->h, NI_PIX_FMT_RGBA)) {
+        return AVERROR(ENOMEM);
+    }
+    s->up_frame = av_frame_alloc();
+
+    return 0;
+}
+
+static int command(AVFilterContext *ctx, const char *cmd, const char *arg, char *res, int res_len, int flags)
+{
+    NetIntDrawTextContext *old = ctx->priv;
+    NetIntDrawTextContext *new = NULL;
+    int ret;
+
+    if (!strcmp(cmd, "reinit")) {
+        new = av_mallocz(sizeof(NetIntDrawTextContext));
+        if (!new)
+            return AVERROR(ENOMEM);
+
+        new->class = &ni_drawtext_class;
+        ret = av_opt_copy(new, old);
+        if (ret < 0)
+            goto fail;
+
+        ctx->priv = new;
+        ret = av_set_options_string(ctx, arg, "=", ":");
+        if (ret < 0) {
+            ctx->priv = old;
+            goto fail;
+        }
+
+        ret = init(ctx);
+        if (ret < 0) {
+            uninit(ctx);
+            ctx->priv = old;
+            goto fail;
+        }
+
+        new->reinit = 1;
+        new->initialized = 0;
+        new->out_frames_ref = av_buffer_ref(old->out_frames_ref);
+
+        ctx->priv = old;
+        // NETINT/FFmpeg-patch:
+        // fix memory leak for ffmpeg while using this function
+        av_opt_free(old);
+        uninit(ctx);
+        av_freep(&old);
+
+        ctx->priv = new;
+        return config_input(ctx->inputs[0]);
+    } else {
+        return AVERROR(ENOSYS);
+    }
+
+fail:
+    av_log(ctx, AV_LOG_ERROR, "Failed to process command. Continuing with existing parameters.\n");
+    av_freep(&new);
+    return ret;
+}
+
+static int func_pict_type(AVFilterContext *ctx, AVBPrint *bp,
+                          char *fct, unsigned argc, char **argv, int tag)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+
+    av_bprintf(bp, "%c", av_get_picture_type_char(s->var_values[VAR_PICT_TYPE]));
+    return 0;
+}
+
+static int func_pts(AVFilterContext *ctx, AVBPrint *bp,
+                    char *fct, unsigned argc, char **argv, int tag)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    const char *fmt;
+    double pts = s->var_values[VAR_T];
+    int ret;
+
+    fmt = argc >= 1 ? argv[0] : "flt";
+    if (argc >= 2) {
+        int64_t delta;
+        if ((ret = av_parse_time(&delta, argv[1], 1)) < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Invalid delta '%s'\n", argv[1]);
+            return ret;
+        }
+        pts += (double)delta / AV_TIME_BASE;
+    }
+    if (!strcmp(fmt, "flt")) {
+        av_bprintf(bp, "%.6f", pts);
+    } else if (!strcmp(fmt, "hms")) {
+        if (isnan(pts)) {
+            av_bprintf(bp, " ??:??:??.???");
+        } else {
+            int64_t ms = llrint(pts * 1000);
+            char sign = ' ';
+            if (ms < 0) {
+                sign = '-';
+                ms = -ms;
+            }
+            if (argc >= 3) {
+                if (!strcmp(argv[2], "24HH")) {
+                    ms %= 24 * 60 * 60 * 1000;
+                } else {
+                    av_log(ctx, AV_LOG_ERROR, "Invalid argument '%s'\n", argv[2]);
+                    return AVERROR(EINVAL);
+                }
+            }
+            av_bprintf(bp, "%c%02d:%02d:%02d.%03d", sign,
+                       (int)(ms / (60 * 60 * 1000)),
+                       (int)(ms / (60 * 1000)) % 60,
+                       (int)(ms / 1000) % 60,
+                       (int)(ms % 1000));
+        }
+    } else if (!strcmp(fmt, "localtime") ||
+               !strcmp(fmt, "gmtime")) {
+        struct tm tm;
+        time_t ms = (time_t)pts;
+        const char *timefmt = argc >= 3 ? argv[2] : "%Y-%m-%d %H:%M:%S";
+        if (!strcmp(fmt, "localtime"))
+            localtime_r(&ms, &tm);
+        else
+            gmtime_r(&ms, &tm);
+        av_bprint_strftime(bp, timefmt, &tm);
+    } else {
+        av_log(ctx, AV_LOG_ERROR, "Invalid format '%s'\n", fmt);
+        return AVERROR(EINVAL);
+    }
+    return 0;
+}
+
+static int func_frame_num(AVFilterContext *ctx, AVBPrint *bp,
+                          char *fct, unsigned argc, char **argv, int tag)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+
+    av_bprintf(bp, "%d", (int)s->var_values[VAR_N]);
+    return 0;
+}
+
+static int func_metadata(AVFilterContext *ctx, AVBPrint *bp,
+                         char *fct, unsigned argc, char **argv, int tag)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    AVDictionaryEntry *e = av_dict_get(s->metadata, argv[0], NULL, 0);
+
+    if (e && e->value)
+        av_bprintf(bp, "%s", e->value);
+    else if (argc >= 2)
+        av_bprintf(bp, "%s", argv[1]);
+    return 0;
+}
+
+static int func_strftime(AVFilterContext *ctx, AVBPrint *bp,
+                         char *fct, unsigned argc, char **argv, int tag)
+{
+    const char *fmt = argc ? argv[0] : "%Y-%m-%d %H:%M:%S";
+    time_t now;
+    struct tm tm;
+
+    time(&now);
+    if (tag == 'L')
+        localtime_r(&now, &tm);
+    else
+        tm = *gmtime_r(&now, &tm);
+    av_bprint_strftime(bp, fmt, &tm);
+    return 0;
+}
+
+static int func_eval_expr(AVFilterContext *ctx, AVBPrint *bp,
+                          char *fct, unsigned argc, char **argv, int tag)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    double res;
+    int ret;
+
+    ret = av_expr_parse_and_eval(&res, argv[0], var_names, s->var_values,
+                                 NULL, NULL, fun2_names, fun2,
+                                 &s->prng, 0, ctx);
+    if (ret < 0)
+        av_log(ctx, AV_LOG_ERROR,
+               "Expression '%s' for the expr text expansion function is not valid\n",
+               argv[0]);
+    else
+        av_bprintf(bp, "%f", res);
+
+    return ret;
+}
+
+static int func_eval_expr_int_format(AVFilterContext *ctx, AVBPrint *bp,
+                          char *fct, unsigned argc, char **argv, int tag)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    double res;
+    int intval;
+    int ret;
+    unsigned int positions = 0;
+    char fmt_str[30] = "%";
+
+    /*
+     * argv[0] expression to be converted to `int`
+     * argv[1] format: 'x', 'X', 'd' or 'u'
+     * argv[2] positions printed (optional)
+     */
+
+    ret = av_expr_parse_and_eval(&res, argv[0], var_names, s->var_values,
+                                 NULL, NULL, fun2_names, fun2,
+                                 &s->prng, 0, ctx);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Expression '%s' for the expr text expansion function is not valid\n",
+               argv[0]);
+        return ret;
+    }
+
+    if (!strchr("xXdu", argv[1][0])) {
+        av_log(ctx, AV_LOG_ERROR, "Invalid format '%c' specified,"
+                " allowed values: 'x', 'X', 'd', 'u'\n", argv[1][0]);
+        return AVERROR(EINVAL);
+    }
+
+    if (argc == 3) {
+        ret = sscanf(argv[2], "%u", &positions);
+        if (ret != 1) {
+            av_log(ctx, AV_LOG_ERROR, "expr_int_format(): Invalid number of positions"
+                    " to print: '%s'\n", argv[2]);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    feclearexcept(FE_ALL_EXCEPT);
+    intval = res;
+#if defined(FE_INVALID) && defined(FE_OVERFLOW) && defined(FE_UNDERFLOW)
+    if ((ret = fetestexcept(FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW))) {
+        av_log(ctx, AV_LOG_ERROR, "Conversion of floating-point result to int failed. Control register: 0x%08x. Conversion result: %d\n", ret, intval);
+        return AVERROR(EINVAL);
+    }
+#endif
+
+    if (argc == 3)
+        av_strlcatf(fmt_str, sizeof(fmt_str), "0%u", positions);
+    av_strlcatf(fmt_str, sizeof(fmt_str), "%c", argv[1][0]);
+
+    av_log(ctx, AV_LOG_DEBUG, "Formatting value %f (expr '%s') with spec '%s'\n",
+            res, argv[0], fmt_str);
+
+    av_bprintf(bp, fmt_str, intval);
+
+    return 0;
+}
+
+static const struct drawtext_function {
+    const char *name;
+    unsigned argc_min, argc_max;
+    int tag;                            /**< opaque argument to func */
+    int (*func)(AVFilterContext *, AVBPrint *, char *, unsigned, char **, int);
+} functions[] = {
+    { "expr",      1, 1, 0,   func_eval_expr },
+    { "e",         1, 1, 0,   func_eval_expr },
+    { "expr_int_format", 2, 3, 0, func_eval_expr_int_format },
+    { "eif",       2, 3, 0,   func_eval_expr_int_format },
+    { "pict_type", 0, 0, 0,   func_pict_type },
+    { "pts",       0, 3, 0,   func_pts      },
+    { "gmtime",    0, 1, 'G', func_strftime },
+    { "localtime", 0, 1, 'L', func_strftime },
+    { "frame_num", 0, 0, 0,   func_frame_num },
+    { "n",         0, 0, 0,   func_frame_num },
+    { "metadata",  1, 2, 0,   func_metadata },
+};
+
+static int eval_function(AVFilterContext *ctx, AVBPrint *bp, char *fct,
+                         unsigned argc, char **argv)
+{
+    unsigned i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(functions); i++) {
+        if (strcmp(fct, functions[i].name))
+            continue;
+        if (argc < functions[i].argc_min) {
+            av_log(ctx, AV_LOG_ERROR, "%%{%s} requires at least %d arguments\n",
+                   fct, functions[i].argc_min);
+            return AVERROR(EINVAL);
+        }
+        if (argc > functions[i].argc_max) {
+            av_log(ctx, AV_LOG_ERROR, "%%{%s} requires at most %d arguments\n",
+                   fct, functions[i].argc_max);
+            return AVERROR(EINVAL);
+        }
+        break;
+    }
+    if (i >= FF_ARRAY_ELEMS(functions)) {
+        av_log(ctx, AV_LOG_ERROR, "%%{%s} is not known\n", fct);
+        return AVERROR(EINVAL);
+    }
+    return functions[i].func(ctx, bp, fct, argc, argv, functions[i].tag);
+}
+
+static int expand_function(AVFilterContext *ctx, AVBPrint *bp, char **rtext)
+{
+    const char *text = *rtext;
+    char *argv[16] = { NULL };
+    unsigned argc = 0, i;
+    int ret;
+
+    if (*text != '{') {
+        av_log(ctx, AV_LOG_ERROR, "Stray %% near '%s'\n", text);
+        return AVERROR(EINVAL);
+    }
+    text++;
+    while (1) {
+        if (!(argv[argc++] = av_get_token(&text, ":}"))) {
+            ret = AVERROR(ENOMEM);
+            goto end;
+        }
+        if (!*text) {
+            av_log(ctx, AV_LOG_ERROR, "Unterminated %%{} near '%s'\n", *rtext);
+            ret = AVERROR(EINVAL);
+            goto end;
+        }
+        if (argc == FF_ARRAY_ELEMS(argv))
+            av_freep(&argv[--argc]); /* error will be caught later */
+        if (*text == '}')
+            break;
+        text++;
+    }
+
+    if ((ret = eval_function(ctx, bp, argv[0], argc - 1, argv + 1)) < 0)
+        goto end;
+    ret = 0;
+    *rtext = (char *)text + 1;
+
+end:
+    for (i = 0; i < argc; i++)
+        av_freep(&argv[i]);
+    return ret;
+}
+
+static int expand_text(AVFilterContext *ctx, char *text, AVBPrint *bp)
+{
+    int ret;
+
+    av_bprint_clear(bp);
+    while (*text) {
+        if (*text == '\\' && text[1]) {
+            av_bprint_chars(bp, text[1], 1);
+            text += 2;
+        } else if (*text == '%') {
+            text++;
+            if ((ret = expand_function(ctx, bp, &text)) < 0)
+                return ret;
+        } else {
+            av_bprint_chars(bp, *text, 1);
+            text++;
+        }
+    }
+    if (!av_bprint_is_complete(bp))
+        return AVERROR(ENOMEM);
+    return 0;
+}
+
+static int draw_glyphs(NetIntDrawTextContext *s, ni_frame_t *frame,
+                       int width, int height,
+                       FFDrawColor *color,
+                       int x, int y, int borderw, int index)
+{
+    char *text = s->expanded_text.str;
+    uint32_t code = 0;
+    int i, x1, y1;
+    uint8_t *p;
+    Glyph *glyph = NULL;
+    int dst_linesize[NI_MAX_NUM_DATA_POINTERS] = {0};
+
+    dst_linesize[0] = frame->data_len[0] / height;
+    dst_linesize[1] = dst_linesize[2] = frame->data_len[1] / (height / 2);
+
+    for (i = 0, p = text; *p; i++) {
+        FT_Bitmap bitmap;
+        Glyph dummy = { 0 };
+        GET_UTF8(code, *p ? *p++ : 0, code = 0xfffd; goto continue_on_invalid;);
+continue_on_invalid:
+        /* skip new line chars, just go to new line */
+        if (code == '\n' || code == '\r' || code == '\t')
+            continue;
+
+        dummy.code = code;
+        dummy.fontsize = s->fontsize[index];
+        glyph = av_tree_find(s->glyphs, &dummy, glyph_cmp, NULL);
+
+        bitmap = borderw ? glyph->border_bitmap : glyph->bitmap;
+
+        if (glyph->bitmap.pixel_mode != FT_PIXEL_MODE_MONO &&
+            glyph->bitmap.pixel_mode != FT_PIXEL_MODE_GRAY)
+            return AVERROR(EINVAL);
+
+        x1 = s->positions[i].x + s->x[index] + x - borderw;
+        y1 = s->positions[i].y + s->y[index] + y - borderw;
+
+        ff_blend_mask(&s->dc, color,
+                      frame->p_data, dst_linesize, width, height,
+                      bitmap.buffer, bitmap.pitch,
+                      bitmap.width, bitmap.rows,
+                      bitmap.pixel_mode == FT_PIXEL_MODE_MONO ? 0 : 3,
+                      0, x1, y1);
+    }
+
+    return 0;
+}
+
+
+static void update_color_with_alpha(NetIntDrawTextContext *s, FFDrawColor *color, const FFDrawColor incolor)
+{
+    *color = incolor;
+    color->rgba[3] = (color->rgba[3] * s->alpha) / 255;
+    ff_draw_color(&s->dc, color, color->rgba);
+}
+
+static void update_alpha(NetIntDrawTextContext *s)
+{
+    double alpha = av_expr_eval(s->a_pexpr, s->var_values, &s->prng);
+
+    if (isnan(alpha))
+        return;
+
+    if (alpha >= 1.0)
+        s->alpha = 255;
+    else if (alpha <= 0)
+        s->alpha = 0;
+    else
+        s->alpha = 256 * alpha;
+}
+
+static void update_canvas_size(NetIntDrawTextContext *s, int x, int y, int w, int h)
+{
+    if (s->x_start == 0 && s->x_end == -1 &&
+        s->y_start == 0 && s->y_end == -1) {
+        s->x_start = x;
+        s->y_start = y;
+        s->x_end = x + w;
+        s->y_end = y + h;
+        return;
+    }
+    if (x < s->x_start)
+        s->x_start = x;
+    if (y < s->y_start)
+        s->y_start = y;
+    if (x + w > s->x_end)
+        s->x_end = x + w;
+    if (y + h > s->y_end)
+        s->y_end = y + h;
+}
+
+static void update_watermark_internal(ni_scaler_watermark_params_t *multi_watermark_params, int x, int y, int w, int h)
+{
+    if (w == 0 || h == 0) {
+        return;
+    }
+    if (multi_watermark_params->ui32Valid) {
+        uint32_t x_end = multi_watermark_params->ui32StartX + multi_watermark_params->ui32Width;
+        uint32_t y_end = multi_watermark_params->ui32StartY + multi_watermark_params->ui32Height;
+        multi_watermark_params->ui32StartX = FFMIN(multi_watermark_params->ui32StartX, x);
+        multi_watermark_params->ui32StartY = FFMIN(multi_watermark_params->ui32StartY, y);
+        x_end = FFMAX(x_end, x + w);
+        y_end = FFMAX(y_end, y + h);
+        multi_watermark_params->ui32Width = x_end - multi_watermark_params->ui32StartX;
+        multi_watermark_params->ui32Height = y_end - multi_watermark_params->ui32StartY;
+    } else {
+        multi_watermark_params->ui32Valid = 1;
+        multi_watermark_params->ui32StartX = x;
+        multi_watermark_params->ui32StartY = y;
+        multi_watermark_params->ui32Width = w;
+        multi_watermark_params->ui32Height = h;
+    }
+}
+
+static void update_signal_watermark(int x0, int y0, int w0, int h0,
+                                   int x1, int y1, int w1, int h1,
+                                   NetIntDrawTextContext *s, int index)
+{
+    int inter_x_start = FFMAX(x0, x1);
+    int inter_y_start = FFMAX(y0, y1);
+    int inter_x_end = FFMIN(x0 + w0, x1 + w1);
+    int inter_y_end = FFMIN(y0 + h0, y1 + h1);
+    if (inter_x_start >= inter_x_end || inter_y_start >= inter_y_end) {
+        return;
+    } else {
+        av_log(s, AV_LOG_DEBUG, "index %d, x0 %d y0 %d w0 %d h0 %d\n", index,
+           x0, y0, w0, h0);
+        av_log(s, AV_LOG_DEBUG, "index %d, xstart %d ystart %d xend %d yend %d\n", index,
+           inter_x_start, inter_y_start, inter_x_end, inter_y_end);
+        update_watermark_internal(&(s->scaler_watermark_paras.multi_watermark_params[index]),
+                                  inter_x_start, inter_y_start,
+                                  inter_x_end - inter_x_start, inter_y_end - inter_y_start);
+    }
+}
+
+static void update_watermark(NetIntDrawTextContext *s, int x, int y, int w, int h)
+{
+    int frame_width = s->watermark_width0 + s->watermark_width1;
+    int frame_height = (s->watermark_height0 * 2) + s->watermark_height1;
+    if (x < 0) {
+        w = FFMAX(w + x, 0);
+        x = 0;
+    }
+    if (y < 0) {
+        h = FFMAX(h + y, 0);
+        y = 0;
+    }
+    if (x + w > frame_width) {
+        x = FFMIN(x, frame_width);
+        w = frame_width - x;
+    }
+    if (y + h > frame_height) {
+        y = FFMIN(y, frame_height);
+        h = frame_height - y;
+    }
+
+    for (int watermark_idx = 0; watermark_idx < NI_MAX_SUPPORT_WATERMARK_NUM; watermark_idx++) {
+        update_signal_watermark(x, y, w, h,
+                                s->watermark_width0 * (watermark_idx % 2),
+                                s->watermark_height0 * (watermark_idx / 2),
+                                watermark_idx % 2 ? s->watermark_width1 : s->watermark_width0,
+                                watermark_idx > 3 ? s->watermark_height1 : s->watermark_height0,
+                                s, watermark_idx);
+    }
+}
+
+static void check_and_expand_canvas_size(NetIntDrawTextContext *s, int min_filter_width, int min_filter_heigth)
+{
+    int x_distance = s->x_end - s->x_start;
+    int y_distance = s->y_end - s->y_start;
+
+    if (x_distance < min_filter_width) {
+        if (s->x_start - 0 >= min_filter_width - x_distance) {
+            s->x_start -= min_filter_width - x_distance;
+        }
+        else {
+            s->x_end += min_filter_width - x_distance;
+        }
+    }
+
+    if (y_distance < min_filter_heigth) {
+        if (s->y_start - 0 >= min_filter_heigth - y_distance) {
+            s->y_start -= min_filter_heigth - y_distance;
+        }
+        else {
+            s->y_end += min_filter_heigth - y_distance;
+        }
+    }
+}
+
+static int draw_text(AVFilterContext *ctx, ni_frame_t *frame,
+                     int width, int height, int64_t pts)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+
+    uint32_t code = 0, prev_code = 0;
+    int x = 0, y = 0, i = 0, j = 0, ret;
+    int max_text_line_w = 0, len;
+    int box_w, box_h;
+    char *text;
+    uint8_t *p;
+    int y_min = 32000, y_max = -32000;
+    int x_min = 32000, x_max = -32000;
+    FT_Vector delta;
+    Glyph *glyph = NULL, *prev_glyph = NULL;
+    Glyph dummy = { 0 };
+
+    time_t now = time(0);
+    struct tm ltime;
+    AVBPrint *bp = &s->expanded_text;
+
+    FFDrawColor fontcolor;
+    FFDrawColor shadowcolor;
+    FFDrawColor bordercolor;
+    FFDrawColor boxcolor;
+    unsigned int dst_linesize[NI_MAX_NUM_DATA_POINTERS] = {0};
+    dst_linesize[0] = frame->data_len[0] / height;
+    dst_linesize[1] = dst_linesize[2] = frame->data_len[1] / height / 2;
+
+    av_bprint_clear(bp);
+
+    if (s->basetime != AV_NOPTS_VALUE)
+        now = pts * av_q2d(ctx->inputs[0]->time_base) + s->basetime/1000000;
+
+    s->upload_drawtext_frame = 0;
+
+    for (i = 0; i < s->text_num; i++) {
+        switch (s->exp_mode) {
+        case EXP_NONE:
+            av_bprintf(bp, "%s", s->text[i]);
+            break;
+        case EXP_NORMAL:
+            if ((ret = expand_text(ctx, s->text[i], &s->expanded_text)) < 0)
+                return ret;
+            break;
+        case EXP_STRFTIME:
+            localtime_r(&now, &ltime);
+            av_bprint_strftime(bp, s->text[i], &ltime);
+            break;
+        }
+        if (s->text_last_updated[i] == NULL) {
+            s->upload_drawtext_frame = 1;
+        } else {
+            if (strcmp(s->text_last_updated[i], bp->str))
+                s->upload_drawtext_frame = 1;
+        }
+        s->text_last_updated[i] = av_realloc(s->text_last_updated[i], bp->len+1);
+        strcpy(s->text_last_updated[i], bp->str);
+
+        if (s->tc_opt_string) {
+            char tcbuf[AV_TIMECODE_STR_SIZE];
+            FilterLink *li = ff_filter_link(inlink);
+            av_timecode_make_string(&s->tc, tcbuf, li->frame_count_out);
+            av_bprint_clear(bp);
+            av_bprintf(bp, "%s%s", s->text[i], tcbuf);
+        }
+
+        if (!av_bprint_is_complete(bp))
+            return AVERROR(ENOMEM);
+        text = s->expanded_text.str;
+        if ((len = s->expanded_text.len) > s->nb_positions) {
+            if (!(s->positions =
+                av_realloc(s->positions, len*sizeof(*s->positions))))
+                return AVERROR(ENOMEM);
+            s->nb_positions = len;
+        }
+
+        if (s->fontcolor_expr[i]) {
+            /* If expression is set, evaluate and replace the static value */
+            av_bprint_clear(&s->expanded_fontcolor);
+            if ((ret = expand_text(ctx, s->fontcolor_expr[i], &s->expanded_fontcolor)) < 0)
+                return ret;
+            if (!av_bprint_is_complete(&s->expanded_fontcolor))
+                return AVERROR(ENOMEM);
+            av_log(s, AV_LOG_DEBUG, "Evaluated fontcolor is '%s'\n", s->expanded_fontcolor.str);
+            ret = av_parse_color(s->fontcolor[i].rgba, s->expanded_fontcolor.str, -1, s);
+            if (ret)
+                return ret;
+            ff_draw_color(&s->dc, &s->fontcolor[i], s->fontcolor[i].rgba);
+        }
+
+        x = 0;
+        y = 0;
+        max_text_line_w = 0;
+
+        if ((ret = update_fontsize(ctx, i)) < 0)
+            return ret;
+
+        /* load and cache glyphs */
+        for (j = 0, p = text; *p; j++) {
+            GET_UTF8(code, *p ? *p++ : 0, code = 0xfffd; goto continue_on_invalid;);
+continue_on_invalid:
+            /* get glyph */
+            dummy.code = code;
+            dummy.fontsize = s->fontsize[i];
+            glyph = av_tree_find(s->glyphs, &dummy, glyph_cmp, NULL);
+            if (!glyph) {
+                ret = load_glyph(ctx, &glyph, code, i);
+                if (ret < 0)
+                    return ret;
+            }
+
+            y_min = FFMIN(glyph->bbox.yMin, y_min);
+            y_max = FFMAX(glyph->bbox.yMax, y_max);
+            x_min = FFMIN(glyph->bbox.xMin, x_min);
+            x_max = FFMAX(glyph->bbox.xMax, x_max);
+        }
+        s->max_glyph_h = y_max - y_min;
+        s->max_glyph_w = x_max - x_min;
+
+        /* compute and save position for each glyph */
+        glyph = NULL;
+        for (j = 0, p = text; *p; j++) {
+            GET_UTF8(code, *p ? *p++ : 0, code = 0xfffd; goto continue_on_invalid2;);
+continue_on_invalid2:
+            /* skip the \n in the sequence \r\n */
+            if (prev_code == '\r' && code == '\n')
+                continue;
+
+            prev_code = code;
+            if (is_newline(code)) {
+
+                max_text_line_w = FFMAX(max_text_line_w, x);
+                y += s->max_glyph_h + s->line_spacing;
+                x = 0;
+                continue;
+            }
+
+            /* get glyph */
+            prev_glyph = glyph;
+            dummy.code = code;
+            dummy.fontsize = s->fontsize[i];
+            glyph = av_tree_find(s->glyphs, &dummy, glyph_cmp, NULL);
+
+            /* kerning */
+            if (s->use_kerning[i] && prev_glyph && glyph->code) {
+                FT_Get_Kerning(s->face[i], prev_glyph->code, glyph->code,
+                               ft_kerning_default, &delta);
+                x += delta.x >> 6;
+            }
+
+            /* save position */
+            s->positions[j].x = x + glyph->bitmap_left;
+            s->positions[j].y = y - glyph->bitmap_top + y_max;
+            if (code == '\t')
+                x  = (x / s->tabsize[i] + 1)*s->tabsize[i];
+            else
+                x += glyph->advance;
+        }
+
+        max_text_line_w = FFMAX(x, max_text_line_w);
+
+        s->var_values[VAR_TW] = s->var_values[VAR_TEXT_W] = max_text_line_w;
+        s->var_values[VAR_TH] = s->var_values[VAR_TEXT_H] = y + s->max_glyph_h;
+
+        s->var_values[VAR_MAX_GLYPH_W] = s->max_glyph_w;
+        s->var_values[VAR_MAX_GLYPH_H] = s->max_glyph_h;
+        s->var_values[VAR_MAX_GLYPH_A] = s->var_values[VAR_ASCENT ] = y_max;
+        s->var_values[VAR_MAX_GLYPH_D] = s->var_values[VAR_DESCENT] = y_min;
+
+        s->var_values[VAR_LINE_H] = s->var_values[VAR_LH] = s->max_glyph_h;
+
+        s->x[i] = s->var_values[VAR_X] = av_expr_eval(s->x_pexpr[i], s->var_values, &s->prng);
+        s->y[i] = s->var_values[VAR_Y] = av_expr_eval(s->y_pexpr[i], s->var_values, &s->prng);
+        /* It is necessary if x is expressed from y  */
+        s->x[i] = s->var_values[VAR_X] = av_expr_eval(s->x_pexpr[i], s->var_values, &s->prng);
+
+        update_alpha(s);
+        update_color_with_alpha(s, &fontcolor  , s->fontcolor[i]);
+        update_color_with_alpha(s, &shadowcolor, s->shadowcolor);
+        update_color_with_alpha(s, &bordercolor, s->bordercolor);
+        update_color_with_alpha(s, &boxcolor   , s->boxcolor[i]);
+
+        box_w = max_text_line_w;
+        box_h = y + s->max_glyph_h;
+
+        if (s->draw_box && s->boxborderw[i]) {
+            int bbsize[4];
+            int count;
+            count = string_to_array(s->boxborderw[i], bbsize, 4);
+            if (count == 1) {
+                s->bb_top[i] = s->bb_right[i] = s->bb_bottom[i] = s->bb_left[i] = bbsize[0];
+            } else if (count == 2) {
+                s->bb_top[i] = s->bb_bottom[i] = bbsize[0];
+                s->bb_right[i] = s->bb_left[i] = bbsize[1];
+            } else if (count == 3) {
+                s->bb_top[i] = bbsize[0];
+                s->bb_right[i] = s->bb_left[i] = bbsize[1];
+                s->bb_bottom[i] = bbsize[2];
+            } else if (count == 4) {
+                s->bb_top[i] = bbsize[0];
+                s->bb_right[i] = bbsize[1];
+                s->bb_bottom[i] = bbsize[2];
+                s->bb_left[i] = bbsize[3];
+            }
+        } else {
+            s->bb_top[i] = s->bb_right[i] = s->bb_bottom[i] = s->bb_left[i] = 0;
+        }
+
+        if (s->fix_bounds) {
+
+            /* calculate footprint of text effects */
+            int borderoffset  = s->borderw  ? FFMAX(s->borderw, 0) : 0;
+
+            int offsetleft = FFMAX3(FFMAX(s->bb_left[i], 0), borderoffset,
+                                    (s->shadowx < 0 ? FFABS(s->shadowx) : 0));
+            int offsettop = FFMAX3(FFMAX(s->bb_top[i], 0), borderoffset,
+                                    (s->shadowy < 0 ? FFABS(s->shadowy) : 0));
+            int offsetright = FFMAX3(FFMAX(s->bb_right[i], 0), borderoffset,
+                                    (s->shadowx > 0 ? s->shadowx : 0));
+            int offsetbottom = FFMAX3(FFMAX(s->bb_bottom[i], 0), borderoffset,
+                                    (s->shadowy > 0 ? s->shadowy : 0));
+
+
+            if (s->x[i] - offsetleft < 0) s->x[i] = offsetleft;
+            if (s->y[i] - offsettop < 0)  s->y[i] = offsettop;
+
+            if (s->x[i] + box_w + offsetright > width)
+                s->x[i] = FFMAX(width - box_w - offsetright, 0);
+            if (s->y[i] + box_h + offsetbottom > height)
+                s->y[i] = FFMAX(height - box_h - offsetbottom, 0);
+        }
+        if (s->x[i] != s->x_bak[i] || s->y[i] != s->y_bak[i]) {
+            s->x_bak[i] = s->x[i];
+            s->y_bak[i] = s->y[i];
+            s->upload_drawtext_frame = 1;
+        }
+        /* draw box */
+        if (s->draw_box)
+            ff_blend_rectangle(&s->dc, &boxcolor,
+                               frame->p_data, dst_linesize, width, height,
+                               s->x[i] - s->bb_left[i], s->y[i] - s->bb_top[i],
+                               box_w + s->bb_left[i] + s->bb_right[i], box_h + s->bb_top[i] + s->bb_bottom[i]);
+
+        if (s->shadowx || s->shadowy) {
+            if ((ret = draw_glyphs(s, frame, width, height,
+                                   &shadowcolor, s->shadowx, s->shadowy, 0, i)) < 0)
+                return ret;
+        }
+
+        if (s->borderw) {
+            if ((ret = draw_glyphs(s, frame, width, height,
+                                &bordercolor, 0, 0, s->borderw, i)) < 0)
+                return ret;
+        }
+        if ((ret = draw_glyphs(s, frame, width, height,
+                            &fontcolor, 0, 0, 0, i)) < 0)
+            return ret;
+
+        update_canvas_size(s, s->x[i] - s->bb_left[i], s->y[i] - s->bb_top[i],
+                           box_w + s->bb_left[i] + s->bb_right[i],  box_h + s->bb_top[i] + s->bb_bottom[i]);
+        update_watermark(s, s->x[i] - s->bb_left[i], s->y[i] - s->bb_top[i],
+                         box_w + s->bb_left[i] + s->bb_right[i], box_h + s->bb_top[i] + s->bb_bottom[i]);
+    }
+    return 0;
+}
+
+static int init_hwframe_uploader(AVFilterContext *ctx, NetIntDrawTextContext *s,
+                                 AVFrame *frame, int txt_w, int txt_h)
+{
+    int ret;
+    AVHWFramesContext *hwframe_ctx;
+    AVHWFramesContext *out_frames_ctx;
+    AVHWFramesContext *main_frame_ctx;
+    AVNIDeviceContext *pAVNIDevCtx;
+    AVNIFramesContext *f_hwctx, *f_hwctx_output;
+    int cardno   = ni_get_cardno(frame);
+    char buf[64] = {0};
+
+    main_frame_ctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+
+    out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+
+    av_log(ctx, AV_LOG_INFO, "%s out_frames_ctx->sw_format %d %s txt %dx%d\n",
+           __func__, out_frames_ctx->sw_format,
+           av_get_pix_fmt_name(out_frames_ctx->sw_format), txt_w, txt_h);
+
+    snprintf(buf, sizeof(buf), "%d", cardno);
+
+    ret = av_hwdevice_ctx_create(&s->hwdevice, AV_HWDEVICE_TYPE_NI_QUADRA, buf,
+                                 NULL, 0);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "failed to create AV HW device ctx\n");
+        return ret;
+    }
+
+    s->hwframe = av_hwframe_ctx_alloc(s->hwdevice);
+    if (!s->hwframe)
+        return AVERROR(ENOMEM);
+
+    hwframe_ctx            = (AVHWFramesContext *)s->hwframe->data;
+    hwframe_ctx->format    = AV_PIX_FMT_NI_QUAD;
+    hwframe_ctx->sw_format = AV_PIX_FMT_RGBA;
+    hwframe_ctx->width     = txt_w;
+    hwframe_ctx->height    = txt_h;
+
+    ret = av_hwframe_ctx_init(s->hwframe);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "failed to init AV HW device ctx\n");
+        return ret;
+    }
+
+    // Work around a hwdownload session start timestamp issue
+    f_hwctx        = (AVNIFramesContext*) hwframe_ctx->hwctx;
+    f_hwctx_output = (AVNIFramesContext*) out_frames_ctx->hwctx;
+    f_hwctx_output->api_ctx.session_timestamp =
+        f_hwctx->api_ctx.session_timestamp;
+
+    s->hw_frames_ctx = av_buffer_ref(s->hwframe);
+    if (!s->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    // set up a scaler session for the in-place overlay
+    ret = ni_device_session_context_init(&s->api_ctx);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR,
+               "ni overlay filter session context init failure\n");
+        return ret;
+    }
+
+    pAVNIDevCtx = (AVNIDeviceContext *)main_frame_ctx->device_ctx->hwctx;
+    s->api_ctx.device_handle = pAVNIDevCtx->cards[cardno];
+    s->api_ctx.blk_io_handle = pAVNIDevCtx->cards[cardno];
+
+    s->api_ctx.hw_id              = cardno;
+    s->api_ctx.device_type        = NI_DEVICE_TYPE_SCALER;
+    s->api_ctx.scaler_operation   = s->use_watermark ?
+                                    NI_SCALER_OPCODE_WATERMARK : NI_SCALER_OPCODE_IPOVLY;
+    s->api_ctx.keep_alive_timeout = s->keep_alive_timeout;
+
+    av_log(ctx, AV_LOG_DEBUG, "%s open overlay session\n", __func__);
+    ret = ni_device_session_open(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+    if (ret != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Can't open scaler session on card %d\n",
+               cardno);
+
+        ni_device_session_close(&s->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&s->api_ctx);
+        return ret;
+    }
+
+    s->session_opened = 1;
+
+    ni_cpy_hwframe_ctx(main_frame_ctx, out_frames_ctx);
+    ni_device_session_copy(&s->api_ctx, &f_hwctx_output->api_ctx);
+    s->buffer_limit = 1;
+    if (s->use_watermark) {
+        // init the out pool for the overlay session when use watermark
+        ret = ff_ni_build_frame_pool(&s->api_ctx, frame->width, frame->height,
+                                     main_frame_ctx->sw_format, 4, s->buffer_limit);
+
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                    "Internal output allocation failed rc = %d\n", ret);
+            return ret;
+        }
+    }
+
+    // if background frame has no alpha, set up an extra intermediate scaler
+    // session for the crop operation
+    if (!s->main_has_alpha && !s->use_watermark) {
+        ret = ni_device_session_context_init(&s->crop_api_ctx);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "ni drawtext filter (crop) session context init failure\n");
+            return ret;
+        }
+
+        s->crop_api_ctx.device_handle = pAVNIDevCtx->cards[cardno];
+        s->crop_api_ctx.blk_io_handle = pAVNIDevCtx->cards[cardno];
+
+        s->crop_api_ctx.hw_id              = cardno;
+        s->crop_api_ctx.device_type        = NI_DEVICE_TYPE_SCALER;
+        s->crop_api_ctx.scaler_operation   = NI_SCALER_OPCODE_CROP;
+        s->crop_api_ctx.keep_alive_timeout = s->keep_alive_timeout;
+
+        av_log(ctx, AV_LOG_DEBUG, "%s open crop session\n", __func__);
+        ret = ni_device_session_open(&s->crop_api_ctx, NI_DEVICE_TYPE_SCALER);
+        if (ret != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Can't open crop session on card %d\n", cardno);
+
+            ni_device_session_close(&s->crop_api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+            ni_device_session_context_clear(&s->crop_api_ctx);
+            return ret;
+        }
+
+        s->crop_session_opened = 1;
+
+        // init the out pool for the crop session, make it rgba
+        ret = ff_ni_build_frame_pool(&s->crop_api_ctx, txt_w, txt_h,
+                                     AV_PIX_FMT_RGBA, 1, 0);
+
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Internal output allocation failed rc = %d\n", ret);
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static int ni_drawtext_config_input(AVFilterContext *ctx, AVFrame *frame,
+                                    int txt_w, int txt_h)
+{
+    NetIntDrawTextContext *s = ctx->priv;
+    int ret;
+
+    if (s->initialized)
+        return 0;
+
+    ret = init_hwframe_uploader(ctx, s, frame, txt_w, txt_h);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "failed to initialize uploader session\n");
+        return ret;
+    }
+
+    s->initialized = 1;
+    return 0;
+}
+
+static int overlay_intersects_background(
+    const AVFilterContext *ctx,
+    int overlay_width,
+    int overlay_height,
+    const AVFrame *main)
+{
+    const NetIntDrawTextContext *s = (NetIntDrawTextContext *)ctx->priv;
+
+    if (s->x_start >= main->width)
+        return 0;
+
+    if (s->y_start >= main->height)
+        return 0;
+
+    if (s->x_start + overlay_width <= 0)
+        return 0;
+
+    if (s->y_start + overlay_height <= 0)
+        return 0;
+
+    return 1;
+}
+
+static void calculate_dst_rectangle(
+    int *px,
+    int *py,
+    int *pw,
+    int *ph,
+    int bgnd_x,
+    int bgnd_y,
+    int bgnd_w,
+    int bgnd_h,
+    int ovly_x,
+    int ovly_y,
+    int ovly_w,
+    int ovly_h)
+{
+    *px = FFMAX(0, ovly_x);
+    *py = FFMAX(0, ovly_y);
+
+    if (ovly_x > 0) {
+        *pw = FFMIN(bgnd_w - ovly_x, ovly_w);
+    } else {
+        *pw = FFMIN(ovly_w + ovly_x, bgnd_w);
+    }
+
+    if (ovly_y > 0) {
+        *ph = FFMIN(bgnd_h - ovly_y, ovly_h);
+    } else {
+        *ph = FFMIN(ovly_h + ovly_y, bgnd_h);
+    }
+}
+
+static void init_watermark(NetIntDrawTextContext *s, int width, int height)
+{
+    s->watermark_width0 = width / 2;
+    s->watermark_width1 = width - s->watermark_width0;
+    s->watermark_height0 = height / 3;
+    s->watermark_height1 = height - (2 * s->watermark_height0);
+    for (int watermark_idx = 0; watermark_idx < NI_MAX_SUPPORT_WATERMARK_NUM; watermark_idx++) {
+        s->scaler_watermark_paras.multi_watermark_params[watermark_idx].ui32StartX = 0;
+        s->scaler_watermark_paras.multi_watermark_params[watermark_idx].ui32StartY = 0;
+        s->scaler_watermark_paras.multi_watermark_params[watermark_idx].ui32Width = 0;
+        s->scaler_watermark_paras.multi_watermark_params[watermark_idx].ui32Height = 0;
+        s->scaler_watermark_paras.multi_watermark_params[watermark_idx].ui32Valid = 0;
+    }
+}
+
+static void calculate_src_rectangle(
+    int *px,
+    int *py,
+    int *pw,
+    int *ph,
+    int bgnd_x,
+    int bgnd_y,
+    int bgnd_w,
+    int bgnd_h,
+    int ovly_x,
+    int ovly_y,
+    int ovly_w,
+    int ovly_h)
+
+{
+    *px = (ovly_x > 0) ? 0 : -ovly_x;
+    *py = (ovly_y > 0) ? 0 : -ovly_y;
+
+    if (ovly_x > 0) {
+        *pw = FFMIN(bgnd_w - ovly_x, ovly_w);
+    } else {
+        *pw = FFMIN(ovly_w + ovly_x, bgnd_w);
+    }
+
+    if (ovly_y > 0) {
+        *ph = FFMIN(bgnd_h - ovly_y, ovly_h);
+    } else {
+        *ph = FFMIN(ovly_h + ovly_y, bgnd_h);
+    }
+}
+
+static int do_intermediate_crop_and_overlay(
+    AVFilterContext *ctx,
+    AVFrame *overlay,
+    AVFrame *frame) {
+    NetIntDrawTextContext *s = (NetIntDrawTextContext *)ctx->priv;
+    AVHWFramesContext    *main_frame_ctx;
+    niFrameSurface1_t    *frame_surface;
+    ni_retcode_t          retcode;
+    uint16_t              ui16FrameIdx;
+    int                   main_scaler_format, ovly_scaler_format;
+    int                   flags;
+    int                   crop_x,crop_y,crop_w,crop_h;
+    int                   src_x,src_y,src_w,src_h;
+
+    main_frame_ctx = (AVHWFramesContext *) frame->hw_frames_ctx->data;
+    main_scaler_format =
+        ff_ni_ffmpeg_to_gc620_pix_fmt(main_frame_ctx->sw_format);
+
+    ovly_scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(AV_PIX_FMT_RGBA);
+
+    // Allocate a ni_frame_t for the intermediate crop operation
+    retcode = ni_frame_buffer_alloc_hwenc(&s->crop_api_dst_frame.data.frame,
+                                          overlay->width,
+                                          overlay->height,
+                                          0);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Can't allocate interim crop frame\n");
+        return AVERROR(ENOMEM);
+    }
+
+    calculate_dst_rectangle(&crop_x, &crop_y, &crop_w, &crop_h,
+                            0, 0, frame->width, frame->height,
+                            FFALIGN(s->x_start,2), FFALIGN(s->y_start,2),
+                            overlay->width, overlay->height);
+
+    frame_surface = (niFrameSurface1_t *)frame->data[3];
+
+    // Assign a device input frame. Send incoming frame index to crop session
+    retcode = ni_device_alloc_frame(
+        &s->crop_api_ctx,
+        FFALIGN(frame->width, 2),
+        FFALIGN(frame->height, 2),
+        main_scaler_format,
+        0,
+        crop_w,
+        crop_h,
+        crop_x,
+        crop_y,
+        0,
+        frame_surface->ui16FrameIdx,
+        NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Can't assign input crop frame %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    // Allocate destination frame. This acquires a frame from the pool
+    retcode = ni_device_alloc_frame(
+        &s->crop_api_ctx,
+        FFALIGN(overlay->width, 2),
+        FFALIGN(overlay->height, 2),
+        ff_ni_ffmpeg_to_gc620_pix_fmt(AV_PIX_FMT_RGBA),
+        NI_SCALER_FLAG_IO,
+        crop_w,
+        crop_h,
+        0,
+        0,
+        0,
+        -1,
+        NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_DEBUG, "Can't allocate output crop frame %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    retcode = ni_device_session_read_hwdesc(&s->crop_api_ctx,
+                                            &s->crop_api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "No cropped output frame %d\n", retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    // Get the acquired frame
+    frame_surface = (niFrameSurface1_t *)
+        s->crop_api_dst_frame.data.frame.p_data[3];
+    s->ui16CropFrameIdx = frame_surface->ui16FrameIdx;
+
+    av_log(ctx, AV_LOG_DEBUG, "%s intrim crop frame idx [%u]\n",
+           __func__, s->ui16CropFrameIdx);
+
+    // Overlay the icon over the intermediate cropped frame
+
+    // Allocate a ni_frame_t for the intermediate overlay
+    retcode = ni_frame_buffer_alloc_hwenc(&s->api_dst_frame.data.frame,
+                                          overlay->width,
+                                          overlay->height,
+                                          0);
+
+    if (retcode < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Can't allocate interim ovly frame\n");
+        return AVERROR(ENOMEM);
+    }
+
+    frame_surface = (niFrameSurface1_t *)overlay->data[3];
+    ui16FrameIdx = frame_surface->ui16FrameIdx;
+
+    calculate_src_rectangle(&src_x, &src_y, &src_w, &src_h,
+                            0, 0, frame->width, frame->height,
+                            FFALIGN(s->x_start,2), FFALIGN(s->y_start,2),
+                            overlay->width, overlay->height);
+
+    /* Assign input frame to intermediate overlay session */
+    retcode = ni_device_alloc_frame(
+        &s->api_ctx,
+        FFALIGN(overlay->width, 2),
+        FFALIGN(overlay->height, 2),
+        ovly_scaler_format,
+        0,
+        src_w,
+        src_h,
+        src_x,
+        src_y,
+        0,
+        ui16FrameIdx,
+        NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Can't assign input overlay frame %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    // In-place overlay frame. Send down frame index of background frame
+    /* Configure output, Premultiply alpha */
+    flags = NI_SCALER_FLAG_IO | NI_SCALER_FLAG_PA;
+
+    retcode = ni_device_alloc_frame(
+       &s->api_ctx,
+       FFALIGN(overlay->width, 2),
+       FFALIGN(overlay->height, 2),
+       ff_ni_ffmpeg_to_gc620_pix_fmt(AV_PIX_FMT_RGBA),
+       flags,
+       crop_w,
+       crop_h,
+       0,
+       0,
+       0,
+       s->ui16CropFrameIdx,
+       NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_DEBUG, "Can't overlay frame for output %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    retcode = ni_device_session_read_hwdesc(&s->api_ctx,
+                                            &s->api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Can't acquire intermediate frame %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    return NI_RETCODE_SUCCESS;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    NetIntDrawTextContext *s = ctx->priv;
+    niFrameSurface1_t *logging_surface, *logging_surface_out;
+    uint8_t *p_dst, *p_src;
+    int y, txt_img_width, txt_img_height;
+    int ret;
+
+    AVHWFramesContext *main_frame_ctx, *ovly_frame_ctx;
+    niFrameSurface1_t *frame_surface, *new_frame_surface;
+    AVFrame *overlay = NULL;
+    AVFrame *out = NULL;
+    uint16_t main_frame_idx = 0;
+    uint16_t ovly_frame_idx = 0;
+    int main_scaler_format, ovly_scaler_format;
+    int flags;
+    int src_x, src_y, src_w, src_h;
+    int dst_x, dst_y, dst_w, dst_h;
+    int start_row, stop_row, start_col, stop_col;
+    int dl_frame_linesize0, text_frame_linesize0;
+    int ovly_width, ovly_height;
+
+    av_log(ctx, AV_LOG_DEBUG, "ni_drawtext %s %dx%d is_hw_frame %d\n",
+           __func__, frame->width, frame->height,
+           AV_PIX_FMT_NI_QUAD == frame->format);
+
+    if (s->reload) {
+        if ((ret = load_textfile(ctx)) < 0) {
+            av_frame_free(&frame);
+            return ret;
+        }
+#if CONFIG_LIBFRIBIDI
+        if (s->text_shaping)
+            if ((ret = shape_text(ctx)) < 0) {
+                av_frame_free(&frame);
+                return ret;
+            }
+#endif
+    }
+
+    FilterLink *li = ff_filter_link(inlink);
+    s->var_values[VAR_N] = li->frame_count_out + s->start_number;
+    s->var_values[VAR_T] = frame->pts == AV_NOPTS_VALUE ?
+        NAN : frame->pts * av_q2d(inlink->time_base);
+
+    s->var_values[VAR_PICT_TYPE] = frame->pict_type;
+    s->var_values[VAR_PKT_DURATION] = frame->duration * av_q2d(inlink->time_base);
+    s->metadata = frame->metadata;
+    s->x_start = 0;
+    s->x_end = -1;
+    s->y_start = 0;
+    s->y_end = -1;
+    init_watermark(s, frame->width, frame->height);
+
+    main_frame_ctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+    av_log(ctx, AV_LOG_DEBUG, "%s HW frame, sw_format %d %s, before drawtext "
+           "var_text_WxH %dx%d\n",
+           __func__, main_frame_ctx->sw_format,
+           av_get_pix_fmt_name(main_frame_ctx->sw_format),
+           (int)s->var_values[VAR_TEXT_W], (int)s->var_values[VAR_TEXT_H]);
+
+    memset(s->dl_frame.data.frame.p_buffer, 0,
+           s->dl_frame.data.frame.buffer_size);
+
+    draw_text(ctx, &(s->dl_frame.data.frame), frame->width, frame->height,
+              frame->pts);
+    check_and_expand_canvas_size(s, NI_MIN_RESOLUTION_WIDTH_SCALER, NI_MIN_RESOLUTION_HEIGHT_SCALER);
+
+    av_log(ctx, AV_LOG_DEBUG, "n:%d t:%f text_w:%d text_h:%d x:%d y:%d "
+           "shadowx:%d shadowy:%d\n",
+           (int)s->var_values[VAR_N], s->var_values[VAR_T],
+           (int)s->var_values[VAR_TEXT_W], (int)s->var_values[VAR_TEXT_H],
+           s->x_start, s->y_start, s->shadowx, s->shadowy);
+
+    txt_img_width = FFALIGN(s->x_end - s->x_start, 2);
+    txt_img_height = FFALIGN(s->y_end - s->y_start, 2);
+
+    if (s->use_watermark) {
+        ovly_width = frame->width;
+        ovly_height = frame->height;
+    } else {
+        ovly_width = txt_img_width;
+        ovly_height = txt_img_height;
+    }
+    // If overlay does not intersect the background, pass
+    // the frame through the drawtext filter.
+    if (!overlay_intersects_background(ctx, txt_img_width, txt_img_height,
+                                       frame)) {
+        return ff_filter_frame(outlink, frame);
+    }
+
+    if (s->use_watermark) {
+        FilterLink *li = ff_filter_link(inlink);
+        int frame_count = li->frame_count_out;
+        for (int watermark_idx = 0; watermark_idx < NI_MAX_SUPPORT_WATERMARK_NUM; watermark_idx++) {
+            if (s->scaler_watermark_paras.multi_watermark_params[watermark_idx].ui32Valid) {
+                av_log(ctx, AV_LOG_DEBUG, "frame %d index %d, x %d, y %d, w %d, h %d\n",
+                    frame_count, watermark_idx,
+                    s->scaler_watermark_paras.multi_watermark_params[watermark_idx].ui32StartX,
+                    s->scaler_watermark_paras.multi_watermark_params[watermark_idx].ui32StartY,
+                    s->scaler_watermark_paras.multi_watermark_params[watermark_idx].ui32Width,
+                    s->scaler_watermark_paras.multi_watermark_params[watermark_idx].ui32Height);
+            }
+        }
+    }
+
+    if (!s->initialized) {
+        if (s->initiated_upload_width == 0) {
+            s->initiated_upload_width = frame->width > ovly_width ? ovly_width + 4 : frame->width;
+            s->initiated_upload_height = frame->height > ovly_height ? ovly_height + 4 : frame->height;
+        }
+        ret = ni_drawtext_config_input(ctx, frame, s->initiated_upload_width, s->initiated_upload_height);
+        if (ret) {
+            av_log(ctx, AV_LOG_ERROR, "failed ni_drawtext config input\n");
+            goto fail;
+        }
+    }
+    txt_img_width = ovly_width = s->initiated_upload_width;
+    txt_img_height = ovly_height = s->initiated_upload_height;
+
+    // Clear the contents of up_frame to avoid accumulating old data
+    av_frame_free(&s->up_frame);
+    s->up_frame = av_frame_alloc();
+    if (!s->up_frame) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if (s->use_watermark) {
+        if (ni_scaler_set_watermark_params(&s->api_ctx,
+                                           &s->scaler_watermark_paras.multi_watermark_params[0])) {
+            av_log(ctx, AV_LOG_ERROR, "failed ni_drawtext set_watermark_params\n");
+            goto fail;
+        }
+        // wrap the dl_frame ni_frame into AVFrame up_frame
+        // for RGBA format, only need to copy the first data
+        // in some situation, like linesize[0] == align64(width*4)
+        // it will use zero copy, and it need to keep data[1] and data[2] be null
+        // for watermark, it uploads the whole frame
+        s->up_frame->data[0] = s->dl_frame.data.frame.p_data[0];
+        s->up_frame->linesize[0] = FFALIGN(ovly_width, 16) * 4;
+    } else {
+        av_log(ctx, AV_LOG_DEBUG, "%s alloc txt_frame %dx%d\n", __func__,
+            txt_img_width, txt_img_height);
+        if (ni_frame_buffer_alloc_dl(&(s->txt_frame.data.frame),
+                                    txt_img_width, txt_img_height,
+                                    NI_PIX_FMT_RGBA)) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        p_dst = s->txt_frame.data.frame.p_buffer;
+        memset(p_dst, 0, s->txt_frame.data.frame.buffer_size);
+
+        start_row = s->y_start;
+        stop_row  = start_row + txt_img_height;
+        dl_frame_linesize0 = FFALIGN(frame->width, 16);
+        text_frame_linesize0 = FFALIGN(txt_img_width, 16);
+        // if overlay intersects at the main top/bottom, only copy the overlaying
+        // portion
+        if (start_row < 0) {
+            p_dst += -1 * start_row * text_frame_linesize0 * 4;
+            start_row = 0;
+        }
+        if (stop_row > frame->height) {
+            stop_row = frame->height;
+        }
+
+        // if overlay intersects at the main left/right, only copy the overlaying
+        // portion
+        start_col = s->x_start;
+        stop_col  = start_col + txt_img_width;
+        if (start_col < 0) {
+            p_dst += (-4 * start_col);
+            start_col = 0;
+        }
+        if (stop_col > frame->width) {
+            stop_col = frame->width;
+        }
+
+        for (y = start_row; y < stop_row; y++) {
+            p_src = s->dl_frame.data.frame.p_buffer +
+                (y * dl_frame_linesize0 + start_col) * 4;
+
+            memcpy(p_dst, p_src, (stop_col - start_col) * 4);
+            p_dst += text_frame_linesize0 * 4;
+        }
+        // wrap the txt ni_frame into AVFrame up_frame
+        // for RGBA format, only need to copy the first data
+        // in some situation, like linesize[0] == align64(width*4)
+        // it will use zero copy, and it need to keep data[1] and data[2] be null
+        // for inplace overlay, it updates the clip include text
+        s->up_frame->data[0] = s->txt_frame.data.frame.p_data[0];
+        s->up_frame->linesize[0] = text_frame_linesize0 * 4;
+    }
+
+    if (s->optimize_upload == 0) //Force uploading drawtext frame by every frame
+        s->upload_drawtext_frame = 1;
+
+    s->filtered_frame_count++;
+    if (s->filtered_frame_count  == s->framerate || s->keep_overlay == NULL) {
+        s->upload_drawtext_frame = 1;
+        s->filtered_frame_count = 0;
+    }
+
+    if (s->upload_drawtext_frame) {
+        av_frame_free(&s->keep_overlay);
+        s->keep_overlay = NULL;
+        s->keep_overlay = overlay = av_frame_alloc();
+        if (!overlay) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        av_frame_copy_props(overlay, frame);
+        overlay->width         = ovly_width;
+        overlay->height        = ovly_height;
+        overlay->format        = AV_PIX_FMT_NI_QUAD;
+        overlay->color_range   = AVCOL_RANGE_MPEG;
+        overlay->hw_frames_ctx = s->out_frames_ref;
+
+        ret = av_hwframe_get_buffer(s->hw_frames_ctx, overlay, 0);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "failed to get buffer\n");
+            av_frame_free(&overlay);
+            return ret;
+        }
+
+        av_frame_copy_props(s->up_frame, frame);
+        s->up_frame->format = AV_PIX_FMT_RGBA;
+        s->up_frame->width  = ovly_width;
+        s->up_frame->height = ovly_height;
+        ret = av_hwframe_transfer_data(overlay, // dst src flags
+                                       s->up_frame, 0);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR, "upload failed, ret = %d\n", ret);
+            return ret;
+        }
+    }
+    else {
+        overlay = s->keep_overlay;
+    }
+    // logging
+    logging_surface = (niFrameSurface1_t*)frame->data[3];
+    logging_surface_out = (niFrameSurface1_t*)overlay->data[3];
+    av_log(ctx, AV_LOG_DEBUG,
+           "vf_drawtext_ni:IN ui16FrameIdx = [%d] uploaded overlay = [%d]\n",
+           logging_surface->ui16FrameIdx, logging_surface_out->ui16FrameIdx);
+
+    // do the in place overlay
+    main_scaler_format =
+        ff_ni_ffmpeg_to_gc620_pix_fmt(main_frame_ctx->sw_format);
+
+    frame_surface = (niFrameSurface1_t *) frame->data[3];
+    if (frame_surface == NULL) {
+        av_frame_free(&overlay);
+        return AVERROR(EINVAL);
+    }
+
+    main_frame_idx = frame_surface->ui16FrameIdx;
+
+    frame_surface = (niFrameSurface1_t *) overlay->data[3];
+    if (frame_surface == NULL) {
+        av_frame_free(&overlay);
+        return AVERROR(EINVAL);
+    }
+
+    ovly_frame_idx = frame_surface->ui16FrameIdx;
+    ovly_frame_ctx = (AVHWFramesContext *)overlay->hw_frames_ctx->data;
+    ovly_scaler_format =
+        ff_ni_ffmpeg_to_gc620_pix_fmt(ovly_frame_ctx->sw_format);
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark(NULL);
+#endif
+
+    // for rgba over yuv, do an intermediate crop and overlay
+    if (!s->main_has_alpha && !s->use_watermark) {
+        ret = do_intermediate_crop_and_overlay(ctx, overlay, frame);
+        if (ret < 0) {
+            av_frame_free(&overlay);
+            return ret;
+        }
+
+        // Allocate a ni_frame for the overlay output
+        ret = ni_frame_buffer_alloc_hwenc(&s->api_dst_frame.data.frame,
+                                          outlink->w,
+                                          outlink->h,
+                                          0);
+
+        if (ret != NI_RETCODE_SUCCESS) {
+            av_frame_free(&overlay);
+            av_log(ctx, AV_LOG_ERROR, "Can't allocate inplace overlay frame\n");
+            return AVERROR(ENOMEM);
+        }
+
+        calculate_src_rectangle(&src_x, &src_y, &src_w, &src_h,
+                                0, 0, frame->width, frame->height,
+                                FFALIGN(s->x_start,2),FFALIGN(s->y_start,2),
+                                overlay->width, overlay->height);
+
+        // Assign an input frame for overlay picture. Send the
+        // incoming hardware frame index to the scaler manager.
+        ret = ni_device_alloc_frame(
+            &s->api_ctx,
+            overlay->width,  // ovly width
+            overlay->height, // ovly height
+            ff_ni_ffmpeg_to_gc620_pix_fmt(AV_PIX_FMT_RGBA), // ovly pix fmt
+            0,                           // flags
+            src_w,                       // src rect width
+            src_h,                       // src rect height
+            0,                           // src rect x
+            0,                           // src rect y
+            0,                           // n/a
+            s->ui16CropFrameIdx,         // ovly frame idx
+            NI_DEVICE_TYPE_SCALER);
+
+        if (ret != NI_RETCODE_SUCCESS) {
+            av_frame_free(&overlay);
+            av_log(ctx, AV_LOG_ERROR, "Can't assign input overlay frame %d\n",
+                   ret);
+            return AVERROR(ENOMEM);
+        }
+
+        calculate_dst_rectangle(&dst_x, &dst_y, &dst_w, &dst_h,
+                                0, 0, frame->width, frame->height,
+                                FFALIGN(s->x_start,2), FFALIGN(s->y_start, 2),
+                                overlay->width, overlay->height);
+
+        // Allocate device output frame from the pool. We also send down the
+        // frame index of the background frame to the scaler manager.
+
+        /* configure the output, premultiply alpha*/
+        flags = NI_SCALER_FLAG_IO | NI_SCALER_FLAG_PA;
+
+        ret = ni_device_alloc_frame(
+            &s->api_ctx,
+            FFALIGN(frame->width, 2),       // main width
+            FFALIGN(frame->height, 2),      // main height
+            main_scaler_format,             // main pix fmt
+            flags,                          // flags
+            dst_w,                          // dst rect width
+            dst_h,                          // dst rect height
+            dst_x,                          // dst rect x
+            dst_y,                          // dst rect y
+            0,                              // n/a
+            main_frame_idx,                 // main frame idx
+            NI_DEVICE_TYPE_SCALER);
+
+        if (ret != NI_RETCODE_SUCCESS) {
+            av_frame_free(&overlay);
+            av_log(ctx, AV_LOG_ERROR, "Can't allocate overlay output %d\n",
+                   ret);
+            return AVERROR(ENOMEM);
+        }
+
+        // Set the new frame index
+        ret = ni_device_session_read_hwdesc(&s->api_ctx,
+                                            &s->api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+
+        if (ret != NI_RETCODE_SUCCESS) {
+            av_frame_free(&overlay);
+            av_log(ctx, AV_LOG_ERROR,
+                   "Can't acquire output overlay frame %d\n", ret);
+            return AVERROR(ENOMEM);
+        }
+    } else {
+        // we can perform an in-place overlay immediately for rgba over rgba,
+        // or use watermark, it overlay rgab over yuv/rgba
+
+        av_log(ctx, AV_LOG_DEBUG, "%s overlay %s main %s\n", __func__,
+               av_get_pix_fmt_name(ovly_frame_ctx->sw_format),
+               av_get_pix_fmt_name(main_frame_ctx->sw_format));
+
+        /* Allocate ni_frame for the overlay output */
+        ret = ni_frame_buffer_alloc_hwenc(&s->api_dst_frame.data.frame,
+                                          outlink->w,
+                                          outlink->h,
+                                          0);
+
+        if (ret != NI_RETCODE_SUCCESS) {
+            av_frame_free(&overlay);
+            av_log(ctx, AV_LOG_ERROR, "Cannot allocate in-place frame\n");
+            return AVERROR(ENOMEM);
+        }
+
+        if (!s->use_watermark) {
+            calculate_src_rectangle(&src_x, &src_y, &src_w, &src_h,
+                        0, 0, frame->width, frame->height,
+                        FFALIGN(s->x_start,2), FFALIGN(s->y_start,2),
+                        overlay->width, overlay->height);
+        }
+
+        /*
+         * Assign input frame for overlay picture. Sends the
+         * incoming hardware frame index to the scaler manager.
+         */
+        ret = ni_device_alloc_frame(
+            &s->api_ctx,
+            overlay->width,          // overlay width
+            overlay->height,        // overlay height
+            ovly_scaler_format,                     // overlay pix fmt
+            0,                                      // flags
+            s->use_watermark ? ovly_width : src_w,  // src rect width
+            s->use_watermark ? ovly_height : src_h, // src rect height
+            s->use_watermark ? 0 : src_x,           // src rect x
+            s->use_watermark ? 0 : src_y,           // src rect y
+            0,                                  // n/a
+            ovly_frame_idx,                     // overlay frame idx
+            NI_DEVICE_TYPE_SCALER);
+
+        if (ret != NI_RETCODE_SUCCESS) {
+            av_frame_free(&overlay);
+            av_log(ctx, AV_LOG_ERROR,
+                   "Can't assign frame for overlay input %d\n", ret);
+            return AVERROR(ENOMEM);
+        }
+
+        if (!s->use_watermark) {
+            /* Configure the output, Premultiply alpha */
+            flags = NI_SCALER_FLAG_IO | NI_SCALER_FLAG_PA;
+
+            calculate_dst_rectangle(&dst_x, &dst_y, &dst_w, &dst_h,
+                                    0, 0, frame->width, frame->height,
+                                    FFALIGN(s->x_start,2), FFALIGN(s->y_start,2),
+                                    overlay->width, overlay->height);
+        }
+        ret = ni_device_alloc_frame(
+            &s->api_ctx,
+            FFALIGN(frame->width, 2),       // main width
+            FFALIGN(frame->height, 2),      // main height
+            main_scaler_format,             // main pix fmt
+            s->use_watermark ? NI_SCALER_FLAG_IO : flags,     // flags
+            s->use_watermark ? frame->width : dst_w,          // dst rect width
+            s->use_watermark ? frame->height : dst_h,         // dst rect height
+            s->use_watermark ? 0 : dst_x,                     // dst rect x
+            s->use_watermark ? 0 : dst_y,                     // dst rect y
+            0,                              // n/a
+            main_frame_idx,                 // main frame idx
+            NI_DEVICE_TYPE_SCALER);
+
+        if (ret != NI_RETCODE_SUCCESS) {
+            av_frame_free(&overlay);
+            av_log(ctx, AV_LOG_ERROR,
+                   "Can't allocate frame for output ovly %d\n", ret);
+            return AVERROR(ENOMEM);
+        }
+
+        ret = ni_device_session_read_hwdesc(&s->api_ctx, &s->api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+
+        if (ret != NI_RETCODE_SUCCESS) {
+            av_frame_free(&overlay);
+            av_log(ctx, AV_LOG_ERROR,
+                   "Can't acquire output frame of overlay %d\n", ret);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark("ni_quadra_drawtext");
+#endif
+
+    if (s->use_watermark) {
+        out = av_frame_alloc();
+        if (!out) {
+            return AVERROR(ENOMEM);
+        }
+
+        av_frame_copy_props(out,frame);
+
+        out->width = outlink->w;
+        out->height = outlink->h;
+        out->format = AV_PIX_FMT_NI_QUAD;
+
+        /* Quadra 2D engine always outputs limited color range */
+        out->color_range = AVCOL_RANGE_MPEG;
+
+        /* Reference the new hw frames context */
+        out->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+        // av_hwframe_get_buffer(s->hw_frames_ctx, out, 0);
+        out->data[3] = av_malloc(sizeof(niFrameSurface1_t));
+
+        if (!out->data[3]) {
+            av_frame_free(&out);
+            return AVERROR(ENOMEM);
+        }
+
+        /* Copy the frame surface from the incoming frame */
+        memcpy(out->data[3], frame->data[3], sizeof(niFrameSurface1_t));
+        frame_surface = (niFrameSurface1_t *) out->data[3];
+        new_frame_surface = (niFrameSurface1_t *) s->api_dst_frame.data.frame.p_data[3];
+        frame_surface->ui16FrameIdx   = new_frame_surface->ui16FrameIdx;
+        frame_surface->ui16session_ID = new_frame_surface->ui16session_ID;
+        frame_surface->device_handle  = new_frame_surface->device_handle;
+        frame_surface->output_idx     = new_frame_surface->output_idx;
+        frame_surface->src_cpu        = new_frame_surface->src_cpu;
+        frame_surface->dma_buf_fd     = 0;
+
+        ff_ni_set_bit_depth_and_encoding_type(&frame_surface->bit_depth,
+                                            &frame_surface->encoding_type,
+                                            main_frame_ctx->sw_format);
+
+        /* Remove ni-split specific assets */
+        frame_surface->ui32nodeAddress = 0;
+
+        frame_surface->ui16width = out->width;
+        frame_surface->ui16height = out->height;
+
+        out->buf[0] = av_buffer_create(out->data[3], sizeof(niFrameSurface1_t), ff_ni_frame_free, NULL, 0);
+
+        av_log(ctx, AV_LOG_DEBUG,
+            "%s:IN trace ui16FrameIdx = [%d] over [%d] --> out [%d]\n",
+            __func__, ovly_frame_idx, main_frame_idx, frame_surface->ui16FrameIdx);
+
+        av_frame_free(&frame);
+        return ff_filter_frame(outlink, out);
+    } else {
+        frame->color_range = AVCOL_RANGE_MPEG;
+
+        if (!s->main_has_alpha) {
+            av_log(ctx, AV_LOG_DEBUG,
+                "%s:IN trace ui16FrameIdx = [%d] and [%d] and [%d] --> out [%d]\n",
+                __func__, main_frame_idx, ovly_frame_idx, s->ui16CropFrameIdx,
+                main_frame_idx);
+        } else {
+            av_log(ctx, AV_LOG_DEBUG,
+            "%s:IN trace ui16FrameIdx = [%d] and [%d] --> out [%d]\n",
+            __func__, main_frame_idx, ovly_frame_idx, main_frame_idx);
+        }
+
+        if (!s->main_has_alpha) {
+            ni_hwframe_buffer_recycle((niFrameSurface1_t *)
+                                    s->crop_api_dst_frame.data.frame.p_data[3],
+                                    (int32_t)s->crop_api_ctx.device_handle);
+        }
+
+        return ff_filter_frame(outlink, frame);
+    }
+fail:
+    return ret;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink  *inlink = ctx->inputs[0];
+    AVFilterLink  *outlink = ctx->outputs[0];
+    AVFrame *frame = NULL;
+    int ret = 0;
+    NetIntDrawTextContext *s = inlink->dst->priv;
+
+    // Forward the status on output link to input link, if the status is set, discard all queued frames
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (ff_inlink_check_available_frame(inlink)) {
+        if (s->initialized) {
+            ret = ni_device_session_query_buffer_avail(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        }
+
+        if (ret == NI_RETCODE_ERROR_UNSUPPORTED_FW_VERSION) {
+            av_log(ctx, AV_LOG_WARNING, "No backpressure support in FW\n");
+        } else if (ret < 0) {
+            av_log(ctx, AV_LOG_WARNING, "%s: query ret %d, ready %u inlink framequeue %lu available_frame %d outlink framequeue %lu frame_wanted %d - return NOT READY\n",
+                __func__, ret, ctx->ready, ff_inlink_queued_frames(inlink), ff_inlink_check_available_frame(inlink), ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+            return FFERROR_NOT_READY;
+        }
+
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+
+        ret = filter_frame(inlink, frame);
+        if (ret >= 0) {
+            ff_filter_set_ready(ctx, 300);
+        }
+        return ret;
+    }
+
+    // We did not get a frame from input link, check its status
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+
+    // We have no frames yet from input link and no EOF, so request some.
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+static const AVFilterPad inputs[] = {
+    {
+        .name           = "default",
+        .type           = AVMEDIA_TYPE_VIDEO,
+        .filter_frame   = filter_frame,
+        .config_props   = config_input,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+};
+
+FFFilter ff_vf_drawtext_ni_quadra = {
+    .p.name          = "ni_quadra_drawtext",
+    .p.description   = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra draw text on top of video frames using libfreetype library v" NI_XCODER_REVISION),
+    .p.priv_class    = &ni_drawtext_class,
+    .priv_size       = sizeof(NetIntDrawTextContext),
+    .init            = init,
+    .uninit          = uninit,
+    .activate        = activate,
+    FILTER_QUERY_FUNC(query_formats),
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    .process_command = command,
+    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+    .flags_internal  = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
diff --git a/libavfilter/vf_flip_ni.c b/libavfilter/vf_flip_ni.c
new file mode 100644
index 0000000000..905c290fb5
--- /dev/null
+++ b/libavfilter/vf_flip_ni.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 2013 Stefano Sabatini
+ * Copyright (c) 2008 Vitor Sessak
+ * Copyright (c) 2022 NETINT Technologies Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * flip filter, based on the FFmpeg flip filter
+*/
+
+#include <string.h>
+
+#include "libavutil/opt.h"
+
+#include "nifilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "libavutil/mem.h"
+#include "fftools/ffmpeg_sched.h"
+#include "libavutil/avstring.h"
+
+typedef struct NetIntFlipContext {
+    const AVClass *class;
+
+    AVBufferRef *out_frames_ref;
+
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_dst_frame;
+
+    int flip_type;
+    bool initialized;
+    bool session_opened;
+    int64_t keep_alive_timeout;
+    int buffer_limit;
+} NetIntFlipContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_NI_QUAD, AV_PIX_FMT_NONE };
+    AVFilterFormats *fmts_list = NULL;
+
+    fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list) {
+        return AVERROR(ENOMEM);
+    }
+
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    NetIntFlipContext *flip = ctx->priv;
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NetIntFlipContext *flip = ctx->priv;
+
+    if (flip->api_dst_frame.data.frame.p_buffer) {
+        ni_frame_buffer_free(&flip->api_dst_frame.data.frame);
+    }
+
+    if (flip->session_opened) {
+        /* Close operation will free the device frames */
+        ni_device_session_close(&flip->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&flip->api_ctx);
+    }
+
+    av_buffer_unref(&flip->out_frames_ref);
+}
+
+static int config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    NetIntFlipContext *flip = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVHWFramesContext *in_frames_ctx, *out_frames_ctx;
+
+    // Quadra 2D engine only supports even pixel widths and heights
+    outlink->w = FFALIGN(inlink->w, 2);
+    outlink->h = FFALIGN(inlink->h, 2);
+
+    if (outlink->w > NI_MAX_RESOLUTION_WIDTH ||
+        outlink->h > NI_MAX_RESOLUTION_HEIGHT) {
+        av_log(ctx, AV_LOG_ERROR, "Resolution %dx%d > %dx%d is not allowed\n",
+               outlink->w, outlink->h,
+               NI_MAX_RESOLUTION_WIDTH, NI_MAX_RESOLUTION_HEIGHT);
+        return AVERROR(EINVAL);
+    }
+
+    FilterLink *li = ff_filter_link(inlink);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext *) li->hw_frames_ctx->data;
+
+    if (in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_8_TILE_4X4 ||
+        in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_10_TILE_4X4) {
+        av_log(ctx, AV_LOG_ERROR, "tile4x4 not supported\n");
+        return AVERROR(EINVAL);
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE,
+           "w:%d h:%d fmt:%s sar:%d/%d -> w:%d h:%d fmt:%s sar:%d/%d\n",
+           inlink->w, inlink->h, av_get_pix_fmt_name(inlink->format),
+           inlink->sample_aspect_ratio.num, inlink->sample_aspect_ratio.den,
+           outlink->w, outlink->h, av_get_pix_fmt_name(outlink->format),
+           outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den);
+
+    flip->out_frames_ref = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+    if (!flip->out_frames_ref) {
+        return AVERROR(ENOMEM);
+    }
+
+    out_frames_ctx = (AVHWFramesContext *) flip->out_frames_ref->data;
+
+    out_frames_ctx->format = AV_PIX_FMT_NI_QUAD;
+    out_frames_ctx->width = outlink->w;
+    out_frames_ctx->height = outlink->h;
+    out_frames_ctx->sw_format = in_frames_ctx->sw_format;
+    out_frames_ctx->initial_pool_size = NI_FLIP_ID; // Repurposed as identity code
+
+    av_hwframe_ctx_init(flip->out_frames_ref);
+
+    FilterLink *lo = ff_filter_link(ctx->outputs[0]);
+    av_buffer_unref(&lo->hw_frames_ctx);
+    lo->hw_frames_ctx = av_buffer_ref(flip->out_frames_ref);
+
+    if (!lo->hw_frames_ctx) {
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int init_out_pool(AVFilterContext *ctx)
+{
+    NetIntFlipContext *flip = ctx->priv;
+    AVHWFramesContext *out_frames_context;
+    int pool_size = DEFAULT_NI_FILTER_POOL_SIZE;
+
+    out_frames_context = (AVHWFramesContext*)flip->out_frames_ref->data;
+    pool_size += ctx->extra_hw_frames > 0 ? ctx->extra_hw_frames : 0;
+    flip->buffer_limit = 1;
+
+    /* Create frame pool on device */
+    return ff_ni_build_frame_pool(&flip->api_ctx,
+                                  out_frames_context->width,
+                                  out_frames_context->height,
+                                  out_frames_context->sw_format,
+                                  pool_size,
+                                  flip->buffer_limit);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFrame *out = NULL;
+    NetIntFlipContext *flip = ctx->priv;
+    AVBufferRef *out_buffer_ref = flip->out_frames_ref;
+    AVHWFramesContext *in_frames_context = (AVHWFramesContext *) in->hw_frames_ctx->data;
+    AVNIDeviceContext *av_ni_device_context = (AVNIDeviceContext *) in_frames_context->device_ctx->hwctx;
+    ni_retcode_t ni_retcode = NI_RETCODE_SUCCESS;
+    niFrameSurface1_t *frame_surface = (niFrameSurface1_t *) in->data[3], *frame_surface2 = NULL;
+    ni_frame_config_t input_frame_config = {0};
+    uint32_t scaler_format;
+    int retcode = 0, card_number =  ni_get_cardno(in);
+
+    if (!frame_surface) {
+        av_log(ctx, AV_LOG_ERROR, "ni flip filter frame_surface should not be NULL\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!flip->initialized) {
+        ni_retcode = ni_device_session_context_init(&flip->api_ctx);
+        if (ni_retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "ni flip filter session context init failed with %d\n", ni_retcode);
+            retcode = AVERROR(EINVAL);
+            goto FAIL;
+        }
+
+        flip->api_ctx.device_handle = flip->api_ctx.blk_io_handle = av_ni_device_context->cards[card_number];
+
+        flip->api_ctx.hw_id = card_number;
+        flip->api_ctx.device_type = NI_DEVICE_TYPE_SCALER;
+        flip->api_ctx.scaler_operation = NI_SCALER_OPCODE_FLIP; //Flip operation compatible with crop
+        flip->api_ctx.keep_alive_timeout = flip->keep_alive_timeout;
+
+        ni_retcode = ni_device_session_open(&flip->api_ctx, NI_DEVICE_TYPE_SCALER);
+        if (ni_retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "ni flip filter device session open failed with %d\n", ni_retcode);
+            retcode = ni_retcode;
+            /* Close operation will free the device frames */
+            ni_device_session_close(&flip->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+            ni_device_session_context_clear(&flip->api_ctx);
+            goto FAIL;
+        }
+
+        flip->session_opened = true;
+
+        if (!((av_strstart(outlink->dst->filter->name, "ni_quadra", NULL)) || (av_strstart(outlink->dst->filter->name, "hwdownload", NULL)))) {
+           inlink->dst->extra_hw_frames = (DEFAULT_FRAME_THREAD_QUEUE_SIZE > 1) ? DEFAULT_FRAME_THREAD_QUEUE_SIZE : 0;
+        }
+        ni_retcode = init_out_pool(inlink->dst);
+        if (ni_retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "ni flip filter init out pool failed with %d\n", ni_retcode);
+            goto FAIL;
+        }
+
+        AVHWFramesContext *out_frames_ctx = (AVHWFramesContext *)out_buffer_ref->data;
+        AVNIFramesContext *out_ni_ctx = (AVNIFramesContext *)out_frames_ctx->hwctx;
+        ni_cpy_hwframe_ctx(in_frames_context, out_frames_ctx);
+        ni_device_session_copy(&flip->api_ctx, &out_ni_ctx->api_ctx);
+
+        AVHWFramesContext *pAVHFWCtx = (AVHWFramesContext *) in->hw_frames_ctx->data;
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pAVHFWCtx->sw_format);
+
+        if ((in->color_range == AVCOL_RANGE_JPEG) && !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+            av_log(ctx, AV_LOG_WARNING, "Full color range input, limited color output\n");
+        }
+
+        flip->initialized = true;
+    }
+
+    ni_retcode = ni_frame_buffer_alloc_hwenc(&flip->api_dst_frame.data.frame,
+                                             outlink->w,
+                                             outlink->h,
+                                             0);
+    if (ni_retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "ni flip filter frame buffer alloc hwenc failed with %d\n", ni_retcode);
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+    // Input.
+    scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(in_frames_context->sw_format);
+    input_frame_config.picture_format = scaler_format;
+
+    input_frame_config.rgba_color = frame_surface->ui32nodeAddress;
+    input_frame_config.frame_index = frame_surface->ui16FrameIdx;
+
+    input_frame_config.rectangle_x = 0;
+    input_frame_config.rectangle_y = 0;
+    input_frame_config.rectangle_width = input_frame_config.picture_width = in->width;
+    input_frame_config.rectangle_height = input_frame_config.picture_height = in->height;
+
+    if (flip->flip_type == 0) {
+        //hflip
+        input_frame_config.orientation = 4;
+    } else if (flip->flip_type == 1) {
+        //vflip
+        input_frame_config.orientation = 5;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark(NULL);
+#endif
+
+    // use ni_device_config_frame() instead of ni_device_alloc_frame()
+    // such that input_frame_config's orientation can be configured
+    ni_retcode = ni_device_config_frame(&flip->api_ctx, &input_frame_config);
+    if (ni_retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "ni flip filter device config input frame failed with %d\n", ni_retcode);
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+    ni_retcode = ni_device_alloc_frame(&flip->api_ctx,
+                                       outlink->w,
+                                       outlink->h,
+                                       scaler_format,
+                                       NI_SCALER_FLAG_IO,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       0,
+                                       -1,
+                                       NI_DEVICE_TYPE_SCALER);
+
+    if (ni_retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "ni flip filter device alloc output frame failed with %d\n", ni_retcode);
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+    out = av_frame_alloc();
+    if (!out) {
+        av_log(ctx, AV_LOG_ERROR, "ni flip filter av_frame_alloc returned NULL\n");
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+    av_frame_copy_props(out, in);
+
+    out->width = outlink->w;
+    out->height = outlink->h;
+    out->format = AV_PIX_FMT_NI_QUAD;
+    out->color_range = AVCOL_RANGE_MPEG;
+
+    out->hw_frames_ctx = av_buffer_ref(out_buffer_ref);
+    out->data[3] = av_malloc(sizeof(niFrameSurface1_t));
+    if (!out->data[3]) {
+        av_log(ctx, AV_LOG_ERROR, "ni flip filter av_malloc returned NULL\n");
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+    memcpy(out->data[3], frame_surface, sizeof(niFrameSurface1_t));
+
+    ni_retcode = ni_device_session_read_hwdesc(&flip->api_ctx,
+                                               &flip->api_dst_frame,
+                                               NI_DEVICE_TYPE_SCALER);
+    if (ni_retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "ni flip filter read hwdesc failed with %d\n", ni_retcode);
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark("ni_quadra_flip");
+#endif
+
+    frame_surface2 = (niFrameSurface1_t *) flip->api_dst_frame.data.frame.p_data[3];
+
+    frame_surface = (niFrameSurface1_t *) out->data[3];
+    frame_surface->ui16FrameIdx = frame_surface2->ui16FrameIdx;
+    frame_surface->ui16session_ID = frame_surface2->ui16session_ID;
+    frame_surface->device_handle = frame_surface2->device_handle;
+    frame_surface->output_idx = frame_surface2->output_idx;
+    frame_surface->src_cpu = frame_surface2->src_cpu;
+    frame_surface->ui32nodeAddress = 0;
+    frame_surface->dma_buf_fd = 0;
+    ff_ni_set_bit_depth_and_encoding_type(&frame_surface->bit_depth,
+                                          &frame_surface->encoding_type,
+                                          in_frames_context->sw_format);
+    frame_surface->ui16width = out->width;
+    frame_surface->ui16height = out->height;
+
+    out->buf[0] = av_buffer_create(out->data[3],
+                                   sizeof(niFrameSurface1_t),
+                                   ff_ni_frame_free,
+                                   NULL,
+                                   0);
+    if (!out->buf[0]) {
+        av_log(ctx, AV_LOG_ERROR, "ni flip filter av_buffer_create returned NULL\n");
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+    av_frame_free(&in);
+    return ff_filter_frame(inlink->dst->outputs[0], out);
+
+FAIL:
+    av_frame_free(&in);
+    if (out)
+        av_frame_free(&out);
+    return retcode;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink  *inlink = ctx->inputs[0];
+    AVFilterLink  *outlink = ctx->outputs[0];
+    AVFrame *frame = NULL;
+    int ret = 0;
+    NetIntFlipContext *s = inlink->dst->priv;
+
+    // Forward the status on output link to input link, if the status is set, discard all queued frames
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (ff_inlink_check_available_frame(inlink)) {
+        if (s->initialized) {
+            ret = ni_device_session_query_buffer_avail(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        }
+
+        if (ret == NI_RETCODE_ERROR_UNSUPPORTED_FW_VERSION) {
+            av_log(ctx, AV_LOG_WARNING, "No backpressure support in FW\n");
+        } else if (ret < 0) {
+            av_log(ctx, AV_LOG_WARNING, "%s: query ret %d, ready %u inlink framequeue %u available_frame %d outlink framequeue %u frame_wanted %d - return NOT READY\n",
+                __func__, ret, ctx->ready, ff_inlink_queued_frames(inlink), ff_inlink_check_available_frame(inlink), ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+            return FFERROR_NOT_READY;
+        }
+
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+
+        ret = filter_frame(inlink, frame);
+        if (ret >= 0) {
+            ff_filter_set_ready(ctx, 300);
+        }
+        return ret;
+    }
+
+    // We did not get a frame from input link, check its status
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+
+    // We have no frames yet from input link and no EOF, so request some.
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+#define OFFSET(x) offsetof(NetIntFlipContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
+
+static const AVOption ni_flip_options[] = {
+    { "flip_type",    "choose horizontal or vertical flip", OFFSET(flip_type), AV_OPT_TYPE_INT, {.i64 = 0},  0,  1, FLAGS, "flip_type" },
+        { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, FLAGS, "flip_type" },
+        { "h",          NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, FLAGS, "flip_type" },
+        { "veritcal",   NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, FLAGS, "flip_type" },
+        { "v",          NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, FLAGS, "flip_type" },
+    NI_FILT_OPTION_KEEPALIVE,
+    NI_FILT_OPTION_BUFFER_LIMIT,
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ni_flip);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props,
+    },
+};
+
+FFFilter ff_vf_flip_ni_quadra = {
+    .p.name        = "ni_quadra_flip",
+    .p.description = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra flip the input video v" NI_XCODER_REVISION),
+    .p.priv_class  = &ni_flip_class,
+    .priv_size     = sizeof(NetIntFlipContext),
+    .init          = init,
+    .uninit        = uninit,
+    .activate      = activate,
+    FILTER_QUERY_FUNC(query_formats),
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
diff --git a/libavfilter/vf_hvsplus_ni.c b/libavfilter/vf_hvsplus_ni.c
new file mode 100644
index 0000000000..438098a896
--- /dev/null
+++ b/libavfilter/vf_hvsplus_ni.c
@@ -0,0 +1,1792 @@
+/*
+* Copyright (c) 2024 NetInt
+*
+* This file is part of FFmpeg.
+*
+* FFmpeg is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* FFmpeg is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with FFmpeg; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "nifilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "libavutil/mem.h"
+#include "fftools/ffmpeg_sched.h"
+#if HAVE_IO_H
+#include <io.h>
+#endif
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/eval.h"
+#include "libavutil/colorspace.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/time.h"
+#include "libswscale/swscale.h"
+#include "drawutils.h"
+#include "ni_device_api.h"
+#include "ni_util.h"
+#include "video.h"
+
+#define NI_NUM_FRAMES_IN_QUEUE 8
+
+// hvsplus related definition
+typedef struct _ni_hvsplus_network_layer {
+    int32_t width;
+    int32_t height;
+    int32_t channel;
+    int32_t classes;
+    int32_t component;
+    int32_t output_number;
+    float *output;
+} ni_hvsplus_network_layer_t;
+
+typedef struct _ni_hvsplus_nbsize {
+    int32_t width;
+    int32_t height;
+} ni_hvsplus_nbsize_t;
+
+typedef struct _ni_hvsplus_network {
+    int32_t netw;
+    int32_t neth;
+    int32_t net_out_w;
+    int32_t net_out_h;
+    ni_network_data_t raw;
+    ni_hvsplus_network_layer_t *layers;
+} ni_hvsplus_network_t;
+
+typedef struct HwPadContext {
+    uint8_t rgba_color[4];  ///< color for the padding area
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_dst_frame;
+} HwPadContext;
+
+typedef struct HwCropContext {
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_dst_frame;
+} HwCropContext;
+
+typedef struct AiContext {
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_src_frame;
+    ni_session_data_io_t api_dst_frame;
+} AiContext;
+
+typedef struct NetIntHvsplusContext {
+    const AVClass *class;
+    int level;
+    int initialized;
+    int devid;
+    int in_width, in_height;
+    int out_width, out_height;
+    int nb_width, nb_height;
+    int need_padding;
+
+    AiContext *ai_ctx;
+    AVBufferRef *out_frames_ref;
+    HwPadContext *hwp_ctx;
+    HwCropContext *hwc_ctx;
+
+    ni_hvsplus_network_t network;
+
+    int keep_alive_timeout; /* keep alive timeout setting */
+    int ai_timeout;
+    int channel_mode;
+    int buffer_limit;
+} NetIntHvsplusContext;
+
+static const ni_hvsplus_nbsize_t nbSizes[] = {
+    {512, 288},
+    {704, 396},
+    {720, 1280},
+    {960, 540},
+    {1280, 720},
+    {1920, 1080},
+    {3840, 2160}
+};
+
+// Find the smallest NB size that is equal to or larger than the input size
+// -1: not supported, 0: matched, >0: index of nbSize + 1
+static int findNBSize(int frameWidth, int frameHeight)
+{
+
+    int numSizes = sizeof(nbSizes) / sizeof(nbSizes[0]);
+    int retval = -1;
+
+    // Iterate through the existing NB sizes to find the smallest one that fits
+    for (int i = 0; i < numSizes; i++) {
+        if (frameWidth == nbSizes[i].width && frameHeight == nbSizes[i].height) {
+            av_log(NULL, AV_LOG_INFO, "%s: matched w %d h %d\n", __func__, nbSizes[i].width, nbSizes[i].height);
+            retval = 0;
+            break;
+        } else if (frameWidth <= nbSizes[i].width && frameHeight <= nbSizes[i].height) {
+            av_log(NULL, AV_LOG_INFO, "%s: w %d h %d\n", __func__, nbSizes[i].width, nbSizes[i].height);
+            retval = i+1;
+            break;
+        }
+    }
+    return retval;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    AVFilterFormats *formats;
+
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUV420P10LE,
+        AV_PIX_FMT_NI_QUAD,
+        AV_PIX_FMT_NONE,
+    };
+
+    formats = ff_make_format_list(pix_fmts);
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(ctx, formats);
+}
+
+static void cleanup_ai_context(AVFilterContext *ctx, NetIntHvsplusContext *s)
+{
+    ni_retcode_t retval;
+    AiContext *ai_ctx = s->ai_ctx;
+
+    if (ai_ctx) {
+        ni_frame_buffer_free(&ai_ctx->api_src_frame.data.frame);
+
+        retval =
+            ni_device_session_close(&ai_ctx->api_ctx, 1, NI_DEVICE_TYPE_AI);
+        if (retval != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR,
+                    "Error: failed to close ai session. retval %d\n", retval);
+        }
+        if (ai_ctx->api_ctx.hw_action != NI_CODEC_HW_ENABLE) {
+#ifdef _WIN32
+            if (ai_ctx->api_ctx.device_handle != NI_INVALID_DEVICE_HANDLE) {
+                ni_device_close(ai_ctx->api_ctx.device_handle);
+            }
+#elif __linux__
+            if (ai_ctx->api_ctx.device_handle != NI_INVALID_DEVICE_HANDLE) {
+                ni_device_close(ai_ctx->api_ctx.device_handle);
+            }
+            if (ai_ctx->api_ctx.blk_io_handle != NI_INVALID_DEVICE_HANDLE) {
+                ni_device_close(ai_ctx->api_ctx.blk_io_handle);
+            }
+#endif
+            ni_packet_buffer_free(&ai_ctx->api_dst_frame.data.packet);
+            ai_ctx->api_ctx.device_handle = NI_INVALID_DEVICE_HANDLE;
+            ai_ctx->api_ctx.blk_io_handle = NI_INVALID_DEVICE_HANDLE;
+        } else {
+            ni_frame_buffer_free(&ai_ctx->api_dst_frame.data.frame);
+        }
+        ni_device_session_context_clear(&ai_ctx->api_ctx);
+        av_free(ai_ctx);
+        s->ai_ctx = NULL;
+    }
+}
+
+
+static int init_ai_context(AVFilterContext *ctx, NetIntHvsplusContext *s,
+                           AVFrame *frame)
+{
+    ni_retcode_t retval;
+    AiContext *ai_ctx;
+    ni_hvsplus_network_t *network = &s->network;
+    int hwframe = frame->format == AV_PIX_FMT_NI_QUAD ? 1 : 0;
+    int ret;
+    AVHWFramesContext *pAVHFWCtx;
+    AVNIDeviceContext *pAVNIDevCtx;
+    AVHWFramesContext *out_frames_ctx;
+    AVNIFramesContext *f_hwctx;
+    int cardno;
+    int format;
+    int options;
+
+    av_log(ctx, AV_LOG_INFO, "%s: %d x %d format %s\n", __func__,
+           s->out_width, s->out_height, av_get_pix_fmt_name(frame->format));
+
+    ai_ctx = av_mallocz(sizeof(AiContext));
+    if (!ai_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "Error: failed to allocate ai context\n");
+        return AVERROR(ENOMEM);
+    }
+    s->ai_ctx = ai_ctx;
+    retval = ni_device_session_context_init(&ai_ctx->api_ctx);
+    if (retval != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error: ai session context init failure\n");
+        return AVERROR(EIO);
+    }
+
+    if (hwframe) {
+        pAVHFWCtx = (AVHWFramesContext*) frame->hw_frames_ctx->data;
+        pAVNIDevCtx = (AVNIDeviceContext*) pAVHFWCtx->device_ctx->hwctx;
+        cardno = ni_get_cardno(frame);
+
+        ai_ctx->api_ctx.device_handle = pAVNIDevCtx->cards[cardno];
+        ai_ctx->api_ctx.blk_io_handle = pAVNIDevCtx->cards[cardno];
+        ai_ctx->api_ctx.hw_action = NI_CODEC_HW_ENABLE;
+        ai_ctx->api_ctx.hw_id = cardno;
+    } else {
+        ai_ctx->api_ctx.hw_id = s->devid;
+    }
+
+    ai_ctx->api_ctx.device_type = NI_DEVICE_TYPE_AI;
+    ai_ctx->api_ctx.keep_alive_timeout = s->keep_alive_timeout;
+
+    retval = ni_device_session_open(&ai_ctx->api_ctx, NI_DEVICE_TYPE_AI);
+    if (retval != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error: failed to open ai session. retval %d\n",
+                retval);
+        ret = AVERROR(EIO);
+        goto failed_out;
+    }
+
+    // Configure NB file
+    av_log(ctx, AV_LOG_DEBUG, "%s: out w %d h %d NB w %d h %d sw_format %s pixel_format %d\n", __func__,
+            s->out_width, s->out_height, s->nb_width, s->nb_height, av_get_pix_fmt_name(hwframe?pAVHFWCtx->sw_format:frame->format),
+            ai_ctx->api_ctx.pixel_format);
+
+    ai_ctx->api_ctx.active_video_width = s->nb_width;
+    ai_ctx->api_ctx.active_video_height = s->nb_height;
+    ai_ctx->api_ctx.hvsplus_level = s->level;
+    ai_ctx->api_ctx.pixel_format = ff_ni_ffmpeg_to_libxcoder_pix_fmt(
+            (hwframe ? pAVHFWCtx->sw_format : frame->format));
+
+    retval = ni_ai_config_hvsplus(&ai_ctx->api_ctx, &network->raw);
+
+    if (retval != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error: failed to configure ai session. retval %d\n",
+                retval);
+        ret = AVERROR(EIO);
+        goto failed_out;
+    }
+
+    if (!hwframe) {
+        return 0;
+    }
+    out_frames_ctx = (AVHWFramesContext*) s->out_frames_ref->data;
+    f_hwctx = (AVNIFramesContext*) out_frames_ctx->hwctx;
+    f_hwctx->api_ctx.session_timestamp = ai_ctx->api_ctx.session_timestamp;
+
+    // Create frame pool
+    format = ff_ni_ffmpeg_to_gc620_pix_fmt(pAVHFWCtx->sw_format);
+    options = NI_AI_FLAG_IO | NI_AI_FLAG_PC;
+    if (s->buffer_limit)
+        options |= NI_AI_FLAG_LM;
+
+    /* Allocate a pool of frames by the AI */
+    retval = ni_device_alloc_frame(&ai_ctx->api_ctx, FFALIGN(s->nb_width, 2),
+            FFALIGN(s->nb_height, 2), format, options, 0, // rec width
+            0, // rec height
+            0, // rec X pos
+            0, // rec Y pos
+            8, // rgba color/pool size
+            0, // frame index
+            NI_DEVICE_TYPE_AI);
+    if (retval != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error: failed to create buffer pool\n");
+        ret = AVERROR(ENOMEM);
+        goto failed_out;
+    }
+    retval = ni_frame_buffer_alloc_hwenc(&ai_ctx->api_dst_frame.data.frame,
+            FFALIGN(s->nb_width, 2), FFALIGN(s->nb_height, 2), 0);
+
+    if (retval != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error: failed to allocate ni dst frame\n");
+        ret = AVERROR(ENOMEM);
+        goto failed_out;
+    }
+
+    return 0;
+
+    failed_out: cleanup_ai_context(ctx, s);
+    return ret;
+}
+
+static void ni_destroy_network(AVFilterContext *ctx,
+                               ni_hvsplus_network_t *network) {
+    if (network) {
+        int i;
+
+        if (network->layers) {
+            for (i = 0; i < network->raw.output_num; i++) {
+                av_freep(&network->layers[i].output);
+            }
+
+            av_freep(&network->layers);
+        }
+    }
+}
+
+static int ni_create_network(AVFilterContext *ctx, ni_hvsplus_network_t *network)
+{
+    int ret;
+    int i;
+    ni_network_data_t *ni_network = &network->raw;
+
+    av_log(ctx, AV_LOG_INFO, "network input number %d, output number %d\n",
+          ni_network->input_num, ni_network->output_num);
+
+    if (ni_network->input_num == 0 || ni_network->output_num == 0) {
+        av_log(ctx, AV_LOG_ERROR, "Error: invalid network layer\n");
+        return AVERROR(EINVAL);
+    }
+
+    network->layers =
+        av_malloc(sizeof(ni_hvsplus_network_layer_t) * ni_network->output_num);
+    if (!network->layers) {
+        av_log(ctx, AV_LOG_ERROR, "Error: cannot allocate network layer memory\n");
+        return AVERROR(ENOMEM);
+    }
+    memset(network->layers, 0,
+          sizeof(ni_hvsplus_network_layer_t) * ni_network->output_num);
+
+    for (i = 0; i < ni_network->output_num; i++) {
+        network->layers[i].channel   = ni_network->linfo.out_param[i].sizes[0];
+        network->layers[i].width     = ni_network->linfo.out_param[i].sizes[1];
+        network->layers[i].height    = ni_network->linfo.out_param[i].sizes[2];
+        network->layers[i].component = 3;
+        network->layers[i].classes =
+            (network->layers[i].channel / network->layers[i].component) -
+            (4 + 1);
+        network->layers[i].output_number =
+            ni_ai_network_layer_dims(&ni_network->linfo.out_param[i]);
+        av_assert0(network->layers[i].output_number ==
+                   network->layers[i].width * network->layers[i].height *
+                   network->layers[i].channel);
+
+        network->layers[i].output =
+            av_malloc(network->layers[i].output_number * sizeof(float));
+        if (!network->layers[i].output) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Error: failed to allocate network layer %d output buffer\n", i);
+            ret = AVERROR(ENOMEM);
+            goto out;
+        }
+
+        av_log(ctx, AV_LOG_DEBUG, "%s: network layer %d: w %d, h %d, ch %d, co %d, cl %d\n", __func__, i,
+               network->layers[i].width, network->layers[i].height,
+               network->layers[i].channel, network->layers[i].component,
+               network->layers[i].classes);
+    }
+
+    network->netw = ni_network->linfo.in_param[0].sizes[1];
+    network->neth = ni_network->linfo.in_param[0].sizes[2];
+    network->net_out_w = ni_network->linfo.out_param[0].sizes[1];
+    network->net_out_h = ni_network->linfo.out_param[0].sizes[2];
+
+    return 0;
+out:
+    ni_destroy_network(ctx, network);
+    return ret;
+}
+
+static av_cold int init_hwframe_pad(AVFilterContext *ctx, NetIntHvsplusContext *s,
+                                        enum AVPixelFormat format,
+                                        AVFrame *frame)
+{
+    ni_retcode_t retval;
+    HwPadContext *hwp_ctx;
+    int ret;
+    AVHWFramesContext *pAVHFWCtx;
+    AVNIDeviceContext *pAVNIDevCtx;
+    int cardno;
+
+    av_log(ctx, AV_LOG_INFO, "%s: format %s\n", __func__, av_get_pix_fmt_name(format));
+
+    hwp_ctx = av_mallocz(sizeof(HwPadContext));
+    if (!hwp_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "Error: could not allocate hwframe ctx\n");
+        return AVERROR(ENOMEM);
+    }
+    s->hwp_ctx = hwp_ctx;
+    ni_device_session_context_init(&hwp_ctx->api_ctx);
+
+    pAVHFWCtx   = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+    pAVNIDevCtx = (AVNIDeviceContext *)pAVHFWCtx->device_ctx->hwctx;
+    cardno      = ni_get_cardno(frame);
+
+    hwp_ctx->api_ctx.device_handle      = pAVNIDevCtx->cards[cardno];
+    hwp_ctx->api_ctx.blk_io_handle      = pAVNIDevCtx->cards[cardno];
+    hwp_ctx->api_ctx.device_type        = NI_DEVICE_TYPE_SCALER;
+    hwp_ctx->api_ctx.scaler_operation   = NI_SCALER_OPCODE_PAD;
+    hwp_ctx->api_ctx.hw_id              = cardno;
+    hwp_ctx->api_ctx.keep_alive_timeout = s->keep_alive_timeout;
+    hwp_ctx->rgba_color[0]              = 0;
+    hwp_ctx->rgba_color[1]              = 0;
+    hwp_ctx->rgba_color[2]              = 0;
+    hwp_ctx->rgba_color[3]              = 255;
+
+
+    retval = ni_device_session_open(&hwp_ctx->api_ctx, NI_DEVICE_TYPE_SCALER);
+    if (retval != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error: could not open scaler session\n");
+        ret = AVERROR(EIO);
+        ni_device_session_close(&hwp_ctx->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&hwp_ctx->api_ctx);
+        goto out;
+    }
+
+    s->buffer_limit = 1;
+
+    /* Create scale frame pool on device */
+    retval = ff_ni_build_frame_pool(&hwp_ctx->api_ctx, s->nb_width,
+                                    s->nb_height, format,
+                                    DEFAULT_NI_FILTER_POOL_SIZE, s->buffer_limit);
+
+    if (retval < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Error: could not build frame pool\n");
+        ret = AVERROR(EIO);
+        ni_device_session_close(&hwp_ctx->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&hwp_ctx->api_ctx);
+        goto out;
+    }
+
+    return 0;
+out:
+    av_free(hwp_ctx);
+    return ret;
+}
+
+static void cleanup_hwframe_pad(NetIntHvsplusContext *s)
+{
+    HwPadContext *hwp_ctx = s->hwp_ctx;
+
+    if (hwp_ctx) {
+        ni_frame_buffer_free(&hwp_ctx->api_dst_frame.data.frame);
+        ni_device_session_close(&hwp_ctx->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&hwp_ctx->api_ctx);
+        av_free(hwp_ctx);
+        s->hwp_ctx = NULL;
+    }
+}
+
+static av_cold int init_hwframe_crop(AVFilterContext *ctx, NetIntHvsplusContext *s,
+                                        enum AVPixelFormat format,
+                                        AVFrame *frame)
+{
+    ni_retcode_t retval;
+    HwCropContext *hwc_ctx;
+    int ret;
+    AVHWFramesContext *pAVHFWCtx;
+    AVNIDeviceContext *pAVNIDevCtx;
+    int cardno;
+    int pool_size = DEFAULT_NI_FILTER_POOL_SIZE;
+
+    av_log(ctx, AV_LOG_INFO, "%s: format %s frame pool for w %d h %d\n",
+            __func__, av_get_pix_fmt_name(format), s->in_width, s->in_height);
+
+    hwc_ctx = av_mallocz(sizeof(HwCropContext));
+    if (!hwc_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "Error: could not allocate hwframe ctx\n");
+        return AVERROR(ENOMEM);
+    }
+    s->hwc_ctx = hwc_ctx;
+    ni_device_session_context_init(&hwc_ctx->api_ctx);
+
+    pAVHFWCtx   = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+    pAVNIDevCtx = (AVNIDeviceContext *)pAVHFWCtx->device_ctx->hwctx;
+    cardno      = ni_get_cardno(frame);
+
+    hwc_ctx->api_ctx.device_handle     = pAVNIDevCtx->cards[cardno];
+    hwc_ctx->api_ctx.blk_io_handle     = pAVNIDevCtx->cards[cardno];
+    hwc_ctx->api_ctx.device_type       = NI_DEVICE_TYPE_SCALER;
+    hwc_ctx->api_ctx.scaler_operation  = NI_SCALER_OPCODE_CROP;
+    hwc_ctx->api_ctx.hw_id             = cardno;
+    hwc_ctx->api_ctx.keep_alive_timeout = s->keep_alive_timeout;
+
+    retval = ni_device_session_open(&hwc_ctx->api_ctx, NI_DEVICE_TYPE_SCALER);
+    if (retval != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Error: could not open scaler session\n");
+        ret = AVERROR(EIO);
+        ni_device_session_close(&hwc_ctx->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&hwc_ctx->api_ctx);
+        goto out;
+    }
+    pool_size += ctx->extra_hw_frames > 0 ? ctx->extra_hw_frames : 0;
+    s->buffer_limit = 1;
+
+    /* Create scale frame pool on device */
+    retval = ff_ni_build_frame_pool(&hwc_ctx->api_ctx, s->in_width,
+                                    s->in_height, format,
+                                    pool_size, s->buffer_limit);
+
+    if (retval < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Error: could not build frame pool\n");
+        ret = AVERROR(EIO);
+        ni_device_session_close(&hwc_ctx->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&hwc_ctx->api_ctx);
+        goto out;
+    }
+
+    return 0;
+out:
+    av_free(hwc_ctx);
+    return ret;
+}
+
+static void cleanup_hwframe_crop(NetIntHvsplusContext *s)
+{
+    HwCropContext *hwc_ctx = s->hwc_ctx;
+
+    if (hwc_ctx) {
+        ni_frame_buffer_free(&hwc_ctx->api_dst_frame.data.frame);
+        ni_device_session_close(&hwc_ctx->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&hwc_ctx->api_ctx);
+        av_free(hwc_ctx);
+        s->hwc_ctx = NULL;
+    }
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    NetIntHvsplusContext *s = ctx->priv;
+
+    s->initialized = 0;
+    s->nb_width = -1;
+    s->nb_height = -1;
+    s->need_padding = 0;
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NetIntHvsplusContext *s  = ctx->priv;
+    ni_hvsplus_network_t *network = &s->network;
+
+    cleanup_ai_context(ctx, s);
+
+    ni_destroy_network(ctx, network);
+
+    av_buffer_unref(&s->out_frames_ref);
+    s->out_frames_ref = NULL;
+
+    if (s->need_padding) {
+        cleanup_hwframe_pad(s);
+        cleanup_hwframe_crop(s);
+    }
+}
+
+static int config_input(AVFilterContext *ctx, AVFrame *frame)
+{
+    NetIntHvsplusContext *s = ctx->priv;
+    int hwframe = frame->format == AV_PIX_FMT_NI_QUAD ? 1 : 0;
+    int ret;
+
+    if (s->initialized)
+        return 0;
+
+    ret = init_ai_context(ctx, s, frame);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Error: failed to initialize ai context\n");
+        return ret;
+    }
+
+    ret = ni_create_network(ctx, &s->network);
+    if (ret != 0) {
+        goto fail_out;
+    }
+
+    if (hwframe && s->need_padding) {
+        AVHWFramesContext *pAVHFWCtx = (AVHWFramesContext*) frame->hw_frames_ctx->data;
+        av_log(ctx, AV_LOG_INFO, "%s: hw frame sw format %s\n", __func__, av_get_pix_fmt_name(pAVHFWCtx->sw_format));
+
+        ret = init_hwframe_pad(ctx, s, pAVHFWCtx->sw_format, frame);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Error: could not initialized hwframe pad context\n");
+            goto fail_out;
+        }
+
+        ret = init_hwframe_crop(ctx, s, pAVHFWCtx->sw_format, frame);
+        if (ret < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Error: could not initialized hwframe crop context\n");
+            goto fail_out;
+        }
+    }
+
+    s->initialized = 1;
+    return 0;
+
+fail_out:
+    cleanup_ai_context(ctx, s);
+
+    ni_destroy_network(ctx, &s->network);
+
+    return ret;
+}
+
+static int output_config_props_internal(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    AVHWFramesContext *in_frames_ctx;
+    AVHWFramesContext *out_frames_ctx;
+    NetIntHvsplusContext *s = ctx->priv;
+    int out_width, out_height;
+
+    if (s->out_width == -1 || s->out_height == -1) {
+        out_width = inlink->w;
+        out_height = inlink->h;
+        s->out_width = out_width;
+        s->out_height = out_height;
+    } else {
+        out_width = s->out_width;
+        out_height = s->out_height;
+    }
+
+    s->in_width = inlink->w;
+    s->in_height = inlink->h;
+
+    av_log(ctx, AV_LOG_INFO, "%s: need_padding %d s->out_width %d s->out_height %d\n", __func__, s->need_padding, s->out_width, s->out_height);
+
+    outlink->w = out_width;
+    outlink->h = out_height;
+
+    FilterLink *li = ff_filter_link(inlink);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_DEBUG, "sw frame\n");
+        return 0;
+    }
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+    if (in_frames_ctx->format != AV_PIX_FMT_NI_QUAD) {
+        av_log(ctx, AV_LOG_ERROR, "Error: pixel format not supported, format=%d\n", in_frames_ctx->format);
+        return AVERROR(EINVAL);
+    }
+    if (in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_8_TILE_4X4 ||
+        in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_10_TILE_4X4) {
+        av_log(ctx, AV_LOG_ERROR, "tile4x4 not supported\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->out_frames_ref = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+    if (!s->out_frames_ref)
+        return AVERROR(ENOMEM);
+
+    out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+
+    out_frames_ctx->format            = AV_PIX_FMT_NI_QUAD;
+    out_frames_ctx->width             = outlink->w;
+    out_frames_ctx->height            = outlink->h;
+    out_frames_ctx->sw_format         = in_frames_ctx->sw_format;
+    out_frames_ctx->initial_pool_size = NI_HVSPLUS_ID;
+
+    av_log(ctx, AV_LOG_INFO, "%s: w %d h %d\n", __func__, out_frames_ctx->width, out_frames_ctx->height);
+
+    FilterLink *lo = ff_filter_link(ctx->outputs[0]);
+    av_buffer_unref(&lo->hw_frames_ctx);
+    lo->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+
+    if (!lo->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int output_config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    NetIntHvsplusContext *s = ctx->priv;
+    int out_width, out_height, retval, ret;
+
+    av_log(ctx, AV_LOG_DEBUG, "%s: inlink src %s dst %s filter %p w %d h %d\n", __func__, inlink->src->name, inlink->dst->name, s, inlink->w, inlink->h);
+    av_log(ctx, AV_LOG_DEBUG, "%s: outlink src %s dst %s filter %p w %d h %d\n", __func__, outlink->src->name, outlink->dst->name, s, outlink->w, outlink->h);
+
+    FilterLink *li = ff_filter_link(inlink);
+    if ((li->hw_frames_ctx == NULL) && (inlink->format == AV_PIX_FMT_NI_QUAD)) {
+        av_log(ctx, AV_LOG_ERROR, "Error: No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (s->out_width == -1 || s->out_height == -1) {
+        out_width = inlink->w;
+        out_height = inlink->h;
+    } else {
+        out_width = s->out_width;
+        out_height = s->out_height;
+    }
+
+    // Find the width and height to be used for the AI hvs filter.
+    // If they match the supporting sizes of network binary, proceed.
+    // If they don't match, padding and cropping are needed before and after hvsplus filter respectively.
+    // If they are greater than 4k, it is not supported. Then, exit.
+    retval = findNBSize(inlink->w, inlink->h);
+    if (retval < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Error: hvsplus doesn't support resolution greater than 4K (width %d height %d).\n", out_width, out_height);
+        return AVERROR(EINVAL);
+    }
+
+    if (retval == 0) {
+        s->nb_width = inlink->w;
+        s->nb_height = inlink->h;
+    } else {
+        s->nb_width = nbSizes[retval-1].width;
+        s->nb_height = nbSizes[retval-1].height;
+        s->need_padding = 1;
+    }
+
+    av_log(ctx, AV_LOG_DEBUG, "%s: inlink w %d h %d NB w %d h %d need_padding %d\n",
+            __func__, inlink->w, inlink->h, s->nb_width, s->nb_height, s->need_padding);
+
+    ret = output_config_props_internal(outlink);
+
+    return ret;
+}
+
+static int av_to_niframe_copy(NetIntHvsplusContext *s, ni_frame_t *dst, const AVFrame *src, int nb_planes)
+{
+    int dst_stride[4],src_height[4], hpad[4], vpad[4], linesize[4];
+    int i, j, h;
+    uint8_t *src_line, *dst_line, YUVsample, *sample, *dest;
+    uint16_t lastidx;
+    bool tenBit;
+
+    av_log(NULL, AV_LOG_DEBUG, "%s: src width %d height %d nb w %d nb h %d format %s linesize %d %d %d nb_planes %d\n", __func__,
+           src->width, src->height, s->nb_width, s->nb_height, av_get_pix_fmt_name(src->format),
+           src->linesize[0], src->linesize[1], src->linesize[2], nb_planes);
+
+    linesize[0] = src->linesize[0];
+    linesize[1] = src->linesize[1];
+    linesize[2] = src->linesize[2];
+    linesize[3] = 0;
+
+    switch (src->format) {
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_YUVJ420P:
+        dst_stride[0] = FFALIGN(s->nb_width, 128);
+        dst_stride[1] = FFALIGN((s->nb_width / 2), 128);
+        dst_stride[2] = dst_stride[1];
+        dst_stride[3] = 0;
+
+        linesize[0] = FFALIGN(src->width, 2);
+        linesize[1] = FFALIGN(src->width, 2) / 2;
+        linesize[2] = linesize[1];
+
+        hpad[0] = FFMAX(dst_stride[0] - linesize[0], 0);
+        hpad[1] = FFMAX(dst_stride[1] - linesize[1], 0);
+        hpad[2] = FFMAX(dst_stride[2] - linesize[2], 0);
+        hpad[3] = 0;
+
+        src_height[0] = src->height;
+        src_height[1] = FFALIGN(src->height, 2) / 2;
+        src_height[2] = FFALIGN(src->height, 2) / 2;
+        src_height[3] = 0;
+
+        vpad[0] = FFALIGN(s->nb_height, 2) - src->height;
+        vpad[1] = (FFALIGN(s->nb_height, 2) / 2) - (FFALIGN(src->height, 2) / 2);
+        vpad[2] = (FFALIGN(s->nb_height, 2) / 2) - (FFALIGN(src->height, 2) / 2);
+        vpad[3] = 0;
+
+        tenBit = false;
+        break;
+    case AV_PIX_FMT_YUV420P10LE:
+        dst_stride[0] = FFALIGN(s->nb_width * 2, 128);
+        dst_stride[1] = FFALIGN(s->nb_width, 128);
+        dst_stride[2] = dst_stride[1];
+        dst_stride[3] = 0;
+
+        linesize[0] = src->width * 2;
+        linesize[1] = src->width;
+        linesize[2] = linesize[1];
+
+        hpad[0] = FFMAX(dst_stride[0] - linesize[0], 0);
+        hpad[1] = FFMAX(dst_stride[1] - linesize[1], 0);
+        hpad[2] = FFMAX(dst_stride[2] - linesize[2], 0);
+        hpad[3] = 0;
+
+        src_height[0] = src->height;
+        src_height[1] = FFALIGN(src->height, 2) / 2;
+        src_height[2] = FFALIGN(src->height, 2) / 2;
+        src_height[3] = 0;
+
+        vpad[0] = FFALIGN(s->nb_height, 2) - src->height;
+        vpad[1] = (FFALIGN(s->nb_height, 2) / 2) - (FFALIGN(src->height, 2) / 2);
+        vpad[2] = (FFALIGN(s->nb_height, 2) / 2) - (FFALIGN(src->height, 2) / 2);
+        vpad[3] = 0;
+
+        tenBit = true;
+        break;
+    case AV_PIX_FMT_NV12:
+        dst_stride[0] = FFALIGN(src->width, 128);
+        dst_stride[1] = dst_stride[0];
+        dst_stride[2] = 0;
+        dst_stride[3] = 0;
+        hpad[0] = FFMAX(dst_stride[0] - linesize[0], 0);
+        hpad[1] = FFMAX(dst_stride[1] - linesize[1], 0);
+        hpad[2] = 0;
+        hpad[3] = 0;
+
+        src_height[0] = src->height;
+        src_height[1] = FFALIGN(src->height, 2) / 2;
+        src_height[2] = 0;
+        src_height[3] = 0;
+
+        vpad[0] = FFALIGN(src_height[0], 2) - src_height[0];
+        vpad[1] = FFALIGN(src_height[1], 2) - src_height[1];
+        vpad[2] = 0;
+        vpad[3] = 0;
+
+        tenBit = false;
+        break;
+    case AV_PIX_FMT_NV16:
+        dst_stride[0] = FFALIGN(src->width, 64);
+        dst_stride[1] = dst_stride[0];
+        dst_stride[2] = 0;
+        dst_stride[3] = 0;
+        hpad[0] = 0;
+        hpad[1] = 0;
+        hpad[2] = 0;
+        hpad[3] = 0;
+
+        src_height[0] = src->height;
+        src_height[1] = src->height;
+        src_height[2] = 0;
+        src_height[3] = 0;
+
+        vpad[0] = 0;
+        vpad[1] = 0;
+        vpad[2] = 0;
+        vpad[3] = 0;
+
+        tenBit = false;
+        break;
+    case AV_PIX_FMT_P010LE:
+        dst_stride[0] = FFALIGN(src->width * 2, 128);
+        dst_stride[1] = dst_stride[0];
+        dst_stride[2] = 0;
+        dst_stride[3] = 0;
+        hpad[0] = FFMAX(dst_stride[0] - linesize[0], 0);
+        hpad[1] = FFMAX(dst_stride[1] - linesize[1], 0);
+        hpad[2] = 0;
+        hpad[3] = 0;
+
+        src_height[0] = src->height;
+        src_height[1] = FFALIGN(src->height, 2) / 2;
+        src_height[2] = 0;
+        src_height[3] = 0;
+
+        vpad[0] = FFALIGN(src_height[0], 2) - src_height[0];
+        vpad[1] = FFALIGN(src_height[1], 2) - src_height[1];
+        vpad[2] = 0;
+        vpad[3] = 0;
+
+        tenBit = true;
+        break;
+    case AV_PIX_FMT_RGBA:
+    case AV_PIX_FMT_BGRA:
+    case AV_PIX_FMT_ABGR:
+    case AV_PIX_FMT_ARGB:
+    case AV_PIX_FMT_BGR0:
+        dst_stride[0] = FFALIGN(src->width, 16) * 4;
+        dst_stride[1] = 0;
+        dst_stride[2] = 0;
+        dst_stride[3] = 0;
+        hpad[0] = FFMAX(dst_stride[0] - linesize[0], 0);
+        hpad[1] = 0;
+        hpad[2] = 0;
+        hpad[3] = 0;
+
+        src_height[0] = src->height;
+        src_height[1] = 0;
+        src_height[2] = 0;
+        src_height[3] = 0;
+
+        vpad[0] = 0;
+        vpad[1] = 0;
+        vpad[2] = 0;
+        vpad[3] = 0;
+
+        tenBit = false;
+        break;
+    case AV_PIX_FMT_YUYV422:
+    case AV_PIX_FMT_UYVY422:
+        dst_stride[0] = FFALIGN(src->width, 16) * 2;
+        dst_stride[1] = 0;
+        dst_stride[2] = 0;
+        dst_stride[3] = 0;
+        hpad[0] = FFMAX(dst_stride[0] - linesize[0], 0);
+        hpad[1] = 0;
+        hpad[2] = 0;
+        hpad[3] = 0;
+
+        src_height[0] = src->height;
+        src_height[1] = 0;
+        src_height[2] = 0;
+        src_height[3] = 0;
+
+        vpad[0] = 0;
+        vpad[1] = 0;
+        vpad[2] = 0;
+        vpad[3] = 0;
+
+        tenBit = false;
+        break;
+    default:
+        av_log(NULL, AV_LOG_ERROR, "Error: Pixel format %s not supported\n",
+               av_get_pix_fmt_name(src->format));
+        return AVERROR(EINVAL);
+    }
+    av_log(NULL, AV_LOG_DEBUG, "%s: dst_stride %d %d %d linesize %d %d %d hpad %d %d %d\n", __func__,
+           dst_stride[0], dst_stride[1], dst_stride[2],
+           src->linesize[0], src->linesize[1], src->linesize[2],
+           hpad[0], hpad[1], hpad[2]);
+    av_log(NULL, AV_LOG_DEBUG, "%s: src_height %d %d %d vpad %d %d %d tenBit %d\n", __func__,
+           src_height[0], src_height[1], src_height[2],
+           vpad[0], vpad[1], vpad[2], tenBit);
+
+    dst_line = dst->p_buffer;
+    for (i = 0; i < nb_planes; i++) {
+        src_line = src->data[i];
+        for (h = 0; h < src_height[i]; h++) {
+            memcpy(dst_line, src_line, FFMIN(linesize[i], dst_stride[i]));
+
+            if (h == 0)
+                av_log(NULL, AV_LOG_DEBUG, "%s: i %d h %d to %d memcpy size %d\n", __func__, i, h, src_height[i]-1, FFMIN(linesize[i], dst_stride[i]));
+
+            if (hpad[i]) {
+                lastidx = linesize[i];
+
+                if (tenBit) {
+                    sample = &src_line[lastidx - 2];
+                    dest   = &dst_line[lastidx];
+
+                    /* two bytes per sample */
+                    for (j = 0; j < hpad[i] / 2; j++) {
+                        memcpy(dest, sample, 2);
+                        dest += 2;
+                    }
+                    if (h == 0)
+                        av_log(NULL, AV_LOG_DEBUG, "%s: i %d hpad %d to %d memset size %d value %d %d tenBit\n", __func__, i, h, src_height[i]-1, hpad[i], sample[0], sample[1]);
+
+                } else {
+                    YUVsample = dst_line[lastidx - 1];
+                    memset(&dst_line[lastidx], YUVsample, hpad[i]);
+
+                    if (h == 0)
+                        av_log(NULL, AV_LOG_DEBUG, "%s: i %d hpad %d to %d memset size %d value %d\n", __func__, i, h, src_height[i]-1, hpad[i], YUVsample);
+                }
+            }
+
+            src_line += src->linesize[i];
+            dst_line += dst_stride[i];
+        }
+
+        /* Extend the height by cloning the last line */
+        src_line = dst_line - dst_stride[i];
+        for (h = 0; h < vpad[i]; h++) {
+            memcpy(dst_line, src_line, dst_stride[i]);
+
+            av_log(NULL, AV_LOG_DEBUG, "%s: h %d memcpy vpad size %d\n", __func__, h, dst_stride[i]);
+
+            dst_line += dst_stride[i];
+        }
+    }
+
+    return 0;
+}
+
+static int ni_to_avframe_copy(NetIntHvsplusContext *s, AVFrame *dst, const ni_packet_t *src, int nb_planes)
+{
+    int src_linesize[4], src_height[4], dst_height[4];
+    int i, h;
+    uint8_t *src_line, *dst_line;
+
+    av_log(NULL, AV_LOG_DEBUG, "%s: dst width %d height %d nb w %d nb h %d format %s nb_planes %d\n", __func__,
+           dst->width, dst->height, s->nb_width, s->nb_height, av_get_pix_fmt_name(dst->format), nb_planes);
+
+    dst_height[0] = dst->height;
+    dst_height[1] = dst->height / 2;
+    dst_height[2] = dst_height[1];
+    dst_height[3] = 0;
+
+    switch (dst->format) {
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_YUVJ420P:
+        src_linesize[0] = FFALIGN(s->nb_width, 128);
+        src_linesize[1] = FFALIGN(s->nb_width / 2, 128);
+        src_linesize[2] = src_linesize[1];
+        src_linesize[3] = 0;
+
+        src_height[0] = s->nb_height;
+        src_height[1] = FFALIGN(s->nb_height, 2) / 2;
+        src_height[2] = src_height[1];
+        src_height[3] = 0;
+        break;
+    case AV_PIX_FMT_YUV420P10LE:
+        src_linesize[0] = FFALIGN(s->nb_width * 2, 128);
+        src_linesize[1] = FFALIGN(s->nb_width, 128);
+        src_linesize[2] = src_linesize[1];
+        src_linesize[3] = 0;
+
+        src_height[0] = s->nb_height;
+        src_height[1] = FFALIGN(s->nb_height, 2) / 2;
+        src_height[2] = src_height[1];
+        src_height[3] = 0;
+        break;
+    case AV_PIX_FMT_NV12:
+        src_linesize[0] = FFALIGN(dst->width, 128);
+        src_linesize[1] = FFALIGN(dst->width, 128);
+        src_linesize[2] = 0;
+        src_linesize[3] = 0;
+
+        src_height[0] = dst->height;
+        src_height[1] = FFALIGN(dst->height, 2) / 2;
+        src_height[2] = 0;
+        src_height[3] = 0;
+        break;
+    case AV_PIX_FMT_NV16:
+        src_linesize[0] = FFALIGN(dst->width, 64);
+        src_linesize[1] = FFALIGN(dst->width, 64);
+        src_linesize[2] = 0;
+        src_linesize[3] = 0;
+
+        src_height[0] = dst->height;
+        src_height[1] = dst->height;
+        src_height[2] = 0;
+        src_height[3] = 0;
+        break;
+    case AV_PIX_FMT_YUYV422:
+    case AV_PIX_FMT_UYVY422:
+        src_linesize[0] = FFALIGN(dst->width, 16) * 2;
+        src_linesize[1] = 0;
+        src_linesize[2] = 0;
+        src_linesize[3] = 0;
+
+        src_height[0] = dst->height;
+        src_height[1] = 0;
+        src_height[2] = 0;
+        src_height[3] = 0;
+        break;
+    case AV_PIX_FMT_P010LE:
+        src_linesize[0] = FFALIGN(dst->width * 2, 128);
+        src_linesize[1] = FFALIGN(dst->width * 2, 128);
+        src_linesize[2] = 0;
+        src_linesize[3] = 0;
+
+        src_height[0] = dst->height;
+        src_height[1] = FFALIGN(dst->height, 2) / 2;
+        src_height[2] = 0;
+        src_height[3] = 0;
+        break;
+    case AV_PIX_FMT_RGBA:
+    case AV_PIX_FMT_BGRA:
+    case AV_PIX_FMT_ABGR:
+    case AV_PIX_FMT_ARGB:
+    case AV_PIX_FMT_BGR0:
+        src_linesize[0] = FFALIGN(dst->width, 16) * 4;
+        src_linesize[1] = 0;
+        src_linesize[2] = 0;
+        src_linesize[3] = 0;
+
+        src_height[0] = dst->height;
+        src_height[1] = 0;
+        src_height[2] = 0;
+        src_height[3] = 0;
+        break;
+    default:
+        av_log(NULL, AV_LOG_ERROR, "Error: Unsupported pixel format %s\n",
+               av_get_pix_fmt_name(dst->format));
+        return AVERROR(EINVAL);
+    }
+    av_log(NULL, AV_LOG_DEBUG, "%s: src_linesize %d %d %d src_height %d %d %d dst linesize %d %d %d dst_height %d %d %d\n", __func__,
+           src_linesize[0], src_linesize[1], src_linesize[2],
+           src_height[0], src_height[1], src_height[2],
+           dst->linesize[0], dst->linesize[1], dst->linesize[2],
+           dst_height[0], dst_height[1], dst_height[2]);
+
+    src_line = src->p_data;
+    for (i = 0; i < nb_planes; i++) {
+        dst_line = dst->data[i];
+
+        for (h = 0; h < src_height[i]; h++) {
+            if (h < dst_height[i]) {
+                memcpy(dst_line, src_line,
+                       FFMIN(src_linesize[i], dst->linesize[i]));
+                if (h == 0)
+                    av_log(NULL, AV_LOG_DEBUG, "%s: i %d h %d to %d memcpy size %d\n", __func__, i, h, src_height[i]-1,  FFMIN(src_linesize[i], dst->linesize[i]));
+                dst_line += FFMIN(src_linesize[i], dst->linesize[i]);
+            }
+            src_line += src_linesize[i];
+        }
+    }
+
+    return 0;
+}
+
+static int ni_hwframe_pad(AVFilterContext *ctx, NetIntHvsplusContext *s, AVFrame *in,
+                          int w, int h,
+                          niFrameSurface1_t **filt_frame_surface)
+{
+    HwPadContext *pad_ctx = s->hwp_ctx;
+    uint32_t ui32RgbaColor, scaler_format;
+    ni_retcode_t retcode;
+    niFrameSurface1_t *frame_surface, *new_frame_surface;
+    AVHWFramesContext *pAVHFWCtx;
+
+    frame_surface = (niFrameSurface1_t *)in->data[3];
+
+    pAVHFWCtx = (AVHWFramesContext *)in->hw_frames_ctx->data;
+
+    av_log(ctx, AV_LOG_DEBUG, "%s: in frame surface frameIdx %d sw_format %s w %d h %d\n", __func__,
+           frame_surface->ui16FrameIdx, av_get_pix_fmt_name(pAVHFWCtx->sw_format), w, h);
+
+    scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(pAVHFWCtx->sw_format);
+
+    retcode = ni_frame_buffer_alloc_hwenc(&pad_ctx->api_dst_frame.data.frame,
+                                          w, h, 0);
+    if (retcode != NI_RETCODE_SUCCESS)
+        return AVERROR(ENOMEM);
+
+    av_log(ctx, AV_LOG_DEBUG,
+           "%s: inlink->w = %d;inlink->h = %d;outlink->w = %d;outlink->h = %d\n", __func__,
+           in->width, in->height, s->nb_width, s->nb_height);
+    av_log(ctx, AV_LOG_DEBUG,
+           "%s: s->w=%d;s->h=%d;s->x=%d;s->y=%d;c=%02x:%02x:%02x:%02x\n", __func__, w,
+           h, 0, 0, pad_ctx->rgba_color[0], pad_ctx->rgba_color[1],
+           pad_ctx->rgba_color[2], pad_ctx->rgba_color[3]);
+
+    /*
+     * Allocate device input frame. This call won't actually allocate a frame,
+     * but sends the incoming hardware frame index to the scaler manager
+     */
+    retcode = ni_device_alloc_frame(&pad_ctx->api_ctx,
+                                    FFALIGN(in->width, 2),
+                                    FFALIGN(in->height, 2),
+                                    scaler_format,
+                                    0,                      // input frame
+                                    in->width,  // src rectangle width
+                                    in->height, // src rectangle height
+                                    0,          // src rectangle x = 0
+                                    0,          // src rectangle y = 0
+                                    frame_surface->ui32nodeAddress,
+                                    frame_surface->ui16FrameIdx,
+                                    NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(NULL, AV_LOG_ERROR, "Error: Can't allocate device input frame %d\n", retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    /* Scaler uses BGRA color, or ARGB in little-endian */
+    ui32RgbaColor = (pad_ctx->rgba_color[3] << 24) | (pad_ctx->rgba_color[0] << 16) |
+                    (pad_ctx->rgba_color[1] << 8) | pad_ctx->rgba_color[2];
+
+    /* Allocate device destination frame. This will acquire a frame from the pool */
+    retcode = ni_device_alloc_frame(&pad_ctx->api_ctx,
+                                    FFALIGN(s->nb_width,2),
+                                    FFALIGN(s->nb_height,2),
+                                    scaler_format,
+                                    NI_SCALER_FLAG_IO,    // output frame
+                                    in->width,            // dst rectangle width
+                                    in->height,           // dst rectangle height
+                                    0, //s->x,                 // dst rectangle x
+                                    0, //s->y,                 // dst rectangle y
+                                    ui32RgbaColor,        // rgba color
+                                    -1,
+                                    NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(NULL, AV_LOG_ERROR, "Error: Can't allocate device output frame %d\n", retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    /* Set the new frame index */
+    ni_device_session_read_hwdesc(
+        &pad_ctx->api_ctx, &pad_ctx->api_dst_frame, NI_DEVICE_TYPE_SCALER);
+    new_frame_surface =
+        (niFrameSurface1_t *)pad_ctx->api_dst_frame.data.frame.p_data[3];
+
+    new_frame_surface->ui16width = s->nb_width;
+    new_frame_surface->ui16height = s->nb_height;
+
+    *filt_frame_surface = new_frame_surface;
+
+    return 0;
+}
+
+static int ni_hwframe_crop(AVFilterContext *ctx, NetIntHvsplusContext *s, AVFrame *in,
+                          int w, int h,
+                          niFrameSurface1_t **filt_frame_surface)
+{
+    AiContext *ai_ctx = s->ai_ctx;
+    HwCropContext *crop_ctx = s->hwc_ctx;
+    uint32_t scaler_format;
+    ni_retcode_t retcode;
+    niFrameSurface1_t *frame_surface, *new_frame_surface;
+    AVHWFramesContext *pAVHFWCtx;
+
+    frame_surface = (niFrameSurface1_t *) ai_ctx->api_dst_frame.data.frame.p_data[3]; //(niFrameSurface1_t *)in->data[3];
+    if (frame_surface == NULL) {
+        av_log(NULL, AV_LOG_ERROR, "Error: frame_surface is NULL\n");
+        return AVERROR(EINVAL);
+    }
+
+    pAVHFWCtx = (AVHWFramesContext *)in->hw_frames_ctx->data;
+
+    av_log(ctx, AV_LOG_DEBUG, "%s: in frame surface frameIdx %d sw_format %s w %d h %d\n", __func__,
+           frame_surface->ui16FrameIdx, av_get_pix_fmt_name(pAVHFWCtx->sw_format), w, h);
+
+    scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(pAVHFWCtx->sw_format);
+
+    retcode = ni_frame_buffer_alloc_hwenc(&crop_ctx->api_dst_frame.data.frame, s->nb_width, s->nb_height, // w, h,
+                                          0);
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(NULL, AV_LOG_ERROR, "Error: Cannot allocate memory\n");
+        return AVERROR(ENOMEM);
+    }
+
+    av_log(ctx, AV_LOG_DEBUG,
+           "%s: inlink->w = %d;inlink->h = %d;outlink->w = %d;outlink->h = %d\n", __func__,
+           s->nb_width, s->nb_height, w, h);
+
+    av_log(ctx, AV_LOG_DEBUG, "%s: x:%d y:%d x+w:%d y+h:%d\n", __func__,
+            0, 0, w, h);
+
+    /*
+     * Allocate device input frame. This call won't actually allocate a frame,
+     * but sends the incoming hardware frame index to the scaler manager
+     */
+    retcode = ni_device_alloc_frame(&crop_ctx->api_ctx,
+                                    FFALIGN(s->nb_width, 2),
+                                    FFALIGN(s->nb_height, 2),
+                                    scaler_format,
+                                    0, // input frame
+                                    w, // src rectangle width
+                                    h, // src rectangle height
+                                    0, // src rectangle x
+                                    0, // src rectangle y
+                                    frame_surface->ui32nodeAddress,
+                                    frame_surface->ui16FrameIdx,
+                                    NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(NULL, AV_LOG_ERROR, "Error: Can't assign input frame %d\n", retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    /* Allocate device destination frame This will acquire a frame from the pool */
+    retcode = ni_device_alloc_frame(&crop_ctx->api_ctx,
+                        FFALIGN(w,2),
+                        FFALIGN(h,2),
+                        scaler_format,
+                        NI_SCALER_FLAG_IO,
+                        0,
+                        0,
+                        0,
+                        0,
+                        0,
+                        -1,
+                        NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(NULL, AV_LOG_ERROR, "Error: Can't allocate device output frame %d\n", retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    /* Set the new frame index */
+    retcode = ni_device_session_read_hwdesc(
+        &crop_ctx->api_ctx, &crop_ctx->api_dst_frame, NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "%s: Error: Can't allocate device output frame %d\n", __func__, retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    new_frame_surface =
+        (niFrameSurface1_t *)crop_ctx->api_dst_frame.data.frame.p_data[3];
+
+    new_frame_surface->ui16width = w;
+    new_frame_surface->ui16height = h;
+
+    *filt_frame_surface = new_frame_surface;
+
+    return 0;
+}
+
+static int filter_frame_internal(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterContext *ctx = link->dst;
+    AVHWFramesContext *in_frames_context = NULL; //= (AVHWFramesContext *) in->hw_frames_ctx->data;
+    NetIntHvsplusContext *s  = ctx->priv;
+    AVFrame *out         = NULL;
+    ni_retcode_t retval;
+    int ret;
+    AiContext *ai_ctx;
+    ni_hvsplus_network_t *network = &s->network;
+    int nb_planes;
+    int64_t start_t;
+    int hwframe = in->format == AV_PIX_FMT_NI_QUAD ? 1 : 0;
+
+    av_log(ctx, AV_LOG_DEBUG, "%s: filter %p hwframe %d format %s\n", __func__, s, hwframe, av_get_pix_fmt_name(in->format));
+
+    if (!s->initialized) {
+        AVHWFramesContext *pAVHFWCtx;
+        if (hwframe) {
+            pAVHFWCtx = (AVHWFramesContext *) in->hw_frames_ctx->data;
+        }
+
+        AVFilterLink *outlink = link->dst->outputs[0];
+        if (!((av_strstart(outlink->dst->filter->name, "ni_quadra", NULL)) || (av_strstart(outlink->dst->filter->name, "hwdownload", NULL)))) {
+           ctx->extra_hw_frames = (DEFAULT_FRAME_THREAD_QUEUE_SIZE > 1) ? DEFAULT_FRAME_THREAD_QUEUE_SIZE : 0;
+        }
+        ret = config_input(ctx, in);
+        if (ret) {
+            av_log(ctx, AV_LOG_ERROR, "Error: failed to config input\n");
+            goto failed_out;
+        }
+        if (hwframe) {
+            av_hwframe_ctx_init(s->out_frames_ref);
+            AVHWFramesContext *out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+            AVNIFramesContext *out_ni_ctx = (AVNIFramesContext *)out_frames_ctx->hwctx;
+            ni_cpy_hwframe_ctx(pAVHFWCtx, out_frames_ctx);
+            ni_device_session_copy(&s->ai_ctx->api_ctx, &out_ni_ctx->api_ctx);
+        }
+    }
+
+    ai_ctx = s->ai_ctx;
+    out = av_frame_alloc();
+    if (!out) {
+        ret = AVERROR(ENOMEM);
+        goto failed_out;
+    }
+
+    av_frame_copy_props(out, in);
+
+    av_log(ctx, AV_LOG_DEBUG, "%s: out_width %d out_height %d in width %d height %d\n",
+            __func__, s->out_width, s->out_height, in->width, in->height);
+
+    if (hwframe) {
+        niFrameSurface1_t *frame_surface;
+        niFrameSurface1_t *hvsplus_surface;
+        niFrameSurface1_t *out_surface;
+        niFrameSurface1_t *frame_surface2;
+        int ai_out_format;
+        niFrameSurface1_t dst_surface = {0};
+
+        in_frames_context = (AVHWFramesContext *) in->hw_frames_ctx->data;
+
+        out->width = (s->need_padding) ? in->width : s->nb_width;
+        out->height = (s->need_padding) ? in->height : s->nb_height;
+
+        out->format = AV_PIX_FMT_NI_QUAD;
+
+        /* Quadra 2D engine always outputs limited color range */
+        out->color_range = AVCOL_RANGE_MPEG;
+
+        if (s->need_padding) {
+            ret = ni_hwframe_pad(ctx, s, in, s->nb_width, s->nb_height, //network->netw, network->neth,
+                                   &frame_surface);
+            if (ret < 0) {
+                av_log(ctx, AV_LOG_ERROR, "Error run hwframe pad\n");
+                goto failed_out;
+            }
+
+            av_log(ctx, AV_LOG_DEBUG, "filt frame surface frameIdx %d\n",
+                    frame_surface->ui16FrameIdx);
+
+            out->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+        } else {
+            // To hvsplus
+            frame_surface = (niFrameSurface1_t *)in->data[3];
+        }
+
+        out->data[3] = av_malloc(sizeof(niFrameSurface1_t));
+        if (!out->data[3]) {
+            av_log(ctx, AV_LOG_ERROR, "Error: ni hvsplus filter av_malloc returned NULL\n");
+            ret = AVERROR(ENOMEM);
+            goto failed_out;
+        }
+
+        memcpy(out->data[3], frame_surface, sizeof(niFrameSurface1_t));
+        av_log(ctx, AV_LOG_DEBUG, "%s: input frame surface frameIdx %d ui16width %d ui16height %d\n",
+               __func__, frame_surface->ui16FrameIdx, frame_surface->ui16width, frame_surface->ui16height);
+
+        start_t = av_gettime();
+
+        /* set output buffer */
+        ai_out_format = ff_ni_ffmpeg_to_gc620_pix_fmt(in_frames_context->sw_format);
+
+        av_log(ctx, AV_LOG_DEBUG, "%s: in sw_format %s ai_out_format %d\n", __func__,
+               av_get_pix_fmt_name(in_frames_context->sw_format), ai_out_format);
+
+#ifdef NI_MEASURE_LATENCY
+        ff_ni_update_benchmark(NULL);
+#endif
+
+        do {
+            if (s->channel_mode) {
+                retval = ni_device_alloc_dst_frame(&(ai_ctx->api_ctx), &dst_surface, NI_DEVICE_TYPE_AI);
+            } else {
+                if (s->need_padding) {
+                    av_log(ctx, AV_LOG_DEBUG, "%s: 1. Set output hw frame in Ai w %d h %d\n",
+                            __func__, s->nb_width, s->nb_height);
+                    retval = ni_device_alloc_frame(
+                        &ai_ctx->api_ctx, FFALIGN(s->nb_width, 2), FFALIGN(s->nb_height,2),
+                        ai_out_format, NI_AI_FLAG_IO, 0, 0,
+                        0, 0, 0, -1, NI_DEVICE_TYPE_AI);
+                } else {
+                    av_log(ctx, AV_LOG_DEBUG, "%s: 1. Set output hw frame in Ai w %d h %d\n",
+                            __func__, s->out_width, s->out_height);
+                    retval = ni_device_alloc_frame(
+                        &ai_ctx->api_ctx, FFALIGN(s->out_width, 2), FFALIGN(s->out_height,2),
+                        ai_out_format, NI_AI_FLAG_IO, 0, 0,
+                        0, 0, 0, -1, NI_DEVICE_TYPE_AI);
+                }
+            }
+
+            if (retval < NI_RETCODE_SUCCESS) {
+                av_log(ctx, AV_LOG_ERROR, "Error: failed to alloc hw output frame\n");
+                ret = AVERROR(ENOMEM);
+                goto failed_out;
+            }
+
+            if (av_gettime() - start_t > s->ai_timeout * 1000000) {
+                av_log(ctx, AV_LOG_ERROR, "Error: alloc hw output timeout\n");
+                ret = AVERROR(ENOMEM);
+                goto failed_out;
+            }
+        } while (retval != NI_RETCODE_SUCCESS);
+
+        if (s->channel_mode) {
+            // copy input hw frame to dst hw frame
+            ni_frameclone_desc_t frame_clone_desc = {0};
+            frame_clone_desc.ui16DstIdx = dst_surface.ui16FrameIdx;
+            frame_clone_desc.ui16SrcIdx = frame_surface->ui16FrameIdx;
+            if (in_frames_context->sw_format == AV_PIX_FMT_YUV420P) {
+                // only support yuv420p
+                if (s->need_padding) {
+                    // offset Y size
+                    frame_clone_desc.ui32Offset = NI_VPU_ALIGN128(s->nb_width) * NI_VPU_CEIL(s->nb_height, 2);
+                    // copy U+V size
+                    frame_clone_desc.ui32Size = NI_VPU_ALIGN128(s->nb_width / 2) * NI_VPU_CEIL(s->nb_height, 2);
+                } else {
+                    // offset Y size
+                    frame_clone_desc.ui32Offset = NI_VPU_ALIGN128(s->out_width) * NI_VPU_CEIL(s->out_height, 2);
+                    // copy U+V size
+                    frame_clone_desc.ui32Size = NI_VPU_ALIGN128(s->out_width / 2) * NI_VPU_CEIL(s->out_height, 2);
+                }
+                retval = ni_device_clone_hwframe(&ai_ctx->api_ctx, &frame_clone_desc);
+                if (retval != NI_RETCODE_SUCCESS) {
+                    av_log(ctx, AV_LOG_ERROR, "Error: failed to clone hw input frame\n");
+                    ret = AVERROR(ENOMEM);
+                    goto failed_out;
+                }
+            } else {
+                av_log(ctx, AV_LOG_ERROR, "Error: support yuv420p only, current fmt %d\n",
+                    in_frames_context->sw_format);
+                ret = AVERROR(EINVAL);
+                goto failed_out;
+            }
+        }
+
+        av_log(ctx, AV_LOG_DEBUG, "%s: 2. Set input hw frame in Ai w %d h %d\n",
+                                __func__, frame_surface->ui16width, frame_surface->ui16height);
+
+        /* set input buffer */
+        retval = ni_device_alloc_frame(&ai_ctx->api_ctx, 0, 0, 0, 0, 0, 0, 0, 0,
+                                    frame_surface->ui32nodeAddress,
+                                    frame_surface->ui16FrameIdx,
+                                    NI_DEVICE_TYPE_AI);
+        if (retval != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Error: failed to alloc hw input frame\n");
+            ret = AVERROR(ENOMEM);
+            goto failed_out;
+        }
+
+        /* Set the new frame index */
+        start_t = av_gettime();
+        do{
+            av_log(ctx, AV_LOG_DEBUG, "%s: 3. Read hw frame from Ai w %d h %d\n",
+                                            __func__, out->width, out->height);
+            retval = ni_device_session_read_hwdesc(
+                &ai_ctx->api_ctx, &s->ai_ctx->api_dst_frame, NI_DEVICE_TYPE_AI);
+
+            if (retval < NI_RETCODE_SUCCESS) {
+                av_log(ctx, AV_LOG_ERROR, "Error: failed to read hwdesc,retval=%d\n", retval);
+                ret = AVERROR(EINVAL);
+                goto failed_out;
+            }
+            if (av_gettime() - start_t > s->ai_timeout * 1000000) {
+                av_log(ctx, AV_LOG_ERROR, "Error: alloc hw output timeout\n");
+                ret = AVERROR(ENOMEM);
+                goto failed_out;
+            }
+        } while (retval != NI_RETCODE_SUCCESS);
+
+#ifdef NI_MEASURE_LATENCY
+        ff_ni_update_benchmark("ni_quadra_hvsplus");
+#endif
+
+        if (s->need_padding) {
+
+            hvsplus_surface = (niFrameSurface1_t *) ai_ctx->api_dst_frame.data.frame.p_data[3];
+
+            ni_hwframe_buffer_recycle(frame_surface, frame_surface->device_handle);
+
+            out->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+
+            memcpy(out->data[3], ai_ctx->api_dst_frame.data.frame.p_data[3], sizeof(niFrameSurface1_t));
+
+            ret = ni_hwframe_crop(ctx, s, in, in->width, in->height, &frame_surface2);
+            if (ret < 0) {
+                av_log(ctx, AV_LOG_ERROR, "Error run hwframe crop\n");
+                goto failed_out;
+            }
+
+            ni_hwframe_buffer_recycle(hvsplus_surface, hvsplus_surface->device_handle);
+
+            av_log(ctx, AV_LOG_DEBUG, "filt frame surface frameIdx %d\n",
+                    frame_surface2->ui16FrameIdx);
+        } else {
+            frame_surface2 = (niFrameSurface1_t *) ai_ctx->api_dst_frame.data.frame.p_data[3];
+        }
+
+        out_surface = (niFrameSurface1_t *) out->data[3];
+
+        av_log(ctx, AV_LOG_DEBUG,"ai pre process, idx=%d\n", frame_surface2->ui16FrameIdx);
+
+        out_surface->ui16FrameIdx = frame_surface2->ui16FrameIdx;
+        out_surface->ui16session_ID = frame_surface2->ui16session_ID;
+        out_surface->device_handle = frame_surface2->device_handle;
+        out_surface->output_idx = frame_surface2->output_idx;
+        out_surface->src_cpu = frame_surface2->src_cpu;
+        out_surface->ui32nodeAddress = 0;
+        out_surface->dma_buf_fd = 0;
+        out_surface->ui16width = out->width;
+        out_surface->ui16height = out->height;
+        ff_ni_set_bit_depth_and_encoding_type(&out_surface->bit_depth,
+                                            &out_surface->encoding_type,
+                                            in_frames_context->sw_format);
+
+        av_log(ctx, AV_LOG_DEBUG, "%s: need_padding %d 4. Read hw frame from Ai w %d %d h %d %d\n",
+                                                        __func__, s->need_padding, out->width, s->out_width, out->height, s->out_height);
+
+        out->buf[0] = av_buffer_create(out->data[3], sizeof(niFrameSurface1_t), ff_ni_frame_free, NULL, 0);
+
+        if (!out->buf[0]) {
+            av_log(ctx, AV_LOG_ERROR, "Error: ni hvsplus filter av_buffer_create returned NULL\n");
+            ret = AVERROR(ENOMEM);
+            av_log(NULL, AV_LOG_DEBUG, "Recycle trace ui16FrameIdx = [%d] DevHandle %d\n",
+                    out_surface->ui16FrameIdx, out_surface->device_handle);
+            retval = ni_hwframe_buffer_recycle(out_surface, out_surface->device_handle);
+            if (retval != NI_RETCODE_SUCCESS) {
+                av_log(NULL, AV_LOG_ERROR, "ERROR: Failed to recycle trace ui16FrameIdx = [%d] DevHandle %d\n",
+                        out_surface->ui16FrameIdx, out_surface->device_handle);
+            }
+            goto failed_out;
+        }
+
+        /* Reference the new hw frames context */
+        out->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+    } else {
+        out->width = s->out_width;
+        out->height = s->out_height;
+
+        out->format = in->format;
+        av_log(ctx, AV_LOG_DEBUG, "%s: format %s allocate frame %d x %d\n", __func__, av_get_pix_fmt_name(in->format), out->width, out->height);
+        if (av_frame_get_buffer(out, 32) < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Error: Could not allocate the AVFrame buffers\n");
+            ret = AVERROR(ENOMEM);
+            goto failed_out;
+        }
+
+        start_t = av_gettime();
+        // sw frame: step 1: allocate
+        retval = ni_ai_frame_buffer_alloc(&ai_ctx->api_src_frame.data.frame,
+                                          &network->raw);
+        if (retval != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Error: cannot allocate ai frame\n");
+            ret = AVERROR(ENOMEM);
+            goto failed_out;
+        }
+        nb_planes = av_pix_fmt_count_planes(in->format);
+        if (s->channel_mode) {
+            if (in->format != AV_PIX_FMT_YUV420P && in->format != AV_PIX_FMT_YUVJ420P) {
+                av_log(ctx, AV_LOG_ERROR, "Error: support yuv420p and yuvj420p only, current fmt %d\n",
+                        in->format);
+                ret = AVERROR(EINVAL);
+                goto failed_out;
+            }
+            nb_planes = 1; // only copy Y data
+        }
+        // sw frame: step 2: pad and setup frame
+        retval = av_to_niframe_copy(s, &ai_ctx->api_src_frame.data.frame, in, nb_planes);
+        if (retval < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Error: hvsplus cannot copy frame\n");
+            ret = AVERROR(EIO);
+            goto failed_out;
+        }
+
+#ifdef NI_MEASURE_LATENCY
+        ff_ni_update_benchmark(NULL);
+#endif
+
+        /* write frame */
+        // sw frame: step 3: write a frame to AI
+        do {
+            retval = ni_device_session_write(
+                &ai_ctx->api_ctx, &ai_ctx->api_src_frame, NI_DEVICE_TYPE_AI);
+            if (retval < 0) {
+                av_log(ctx, AV_LOG_ERROR,
+                       "Error: failed to write ai session: retval %d\n", retval);
+                ret = AVERROR(EIO);
+                goto failed_out;
+            }
+
+            if (av_gettime() - start_t > s->ai_timeout * 1000000) {
+                av_log(ctx, AV_LOG_ERROR, "Error: write sw frame to AI timeout\n");
+                ret = AVERROR(ENOMEM);
+                goto failed_out;
+            }
+        } while (retval == 0);
+        // sw frame: step 4: alloc frame for read
+        retval = ni_ai_packet_buffer_alloc(&ai_ctx->api_dst_frame.data.packet,
+                                        &network->raw);
+        if (retval != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Error: failed to allocate ni packet\n");
+            ret = AVERROR(ENOMEM);
+            goto failed_out;
+        }
+
+        start_t = av_gettime();
+        // sw frame: step 5: read a frame from AI
+        do {
+            retval = ni_device_session_read(&ai_ctx->api_ctx, &ai_ctx->api_dst_frame, NI_DEVICE_TYPE_AI);
+            if (retval < 0) {
+                av_log(NULL,AV_LOG_ERROR,"Error: read AI data retval %d\n", retval);
+                ret = AVERROR(EIO);
+                goto failed_out;
+            } else if (retval > 0) {
+                if (av_gettime() - start_t > s->ai_timeout * 1000000) {
+                    av_log(ctx, AV_LOG_ERROR, "Error: read sw frame from AI timeout\n");
+                    ret = AVERROR(ENOMEM);
+                    goto failed_out;
+                }
+            }
+        } while (retval == 0);
+#ifdef NI_MEASURE_LATENCY
+        ff_ni_update_benchmark("ni_quadra_hvsplus");
+#endif
+        nb_planes = av_pix_fmt_count_planes(out->format);
+        if (s->channel_mode) {
+            if (out->format != AV_PIX_FMT_YUV420P && out->format != AV_PIX_FMT_YUVJ420P) {
+                av_log(ctx, AV_LOG_ERROR, "Error: support yuv420p and yuvj420p only, current fmt %d\n",
+                        out->format);
+                ret = AVERROR(EINVAL);
+                goto failed_out;
+            }
+            nb_planes = 1; // only copy Y data
+            // copy U/V data from the input sw frame
+            memcpy(out->data[1], in->data[1], in->height * in->linesize[1] / 2);
+            memcpy(out->data[2], in->data[2], in->height * in->linesize[2] / 2);
+        }
+        // sw frame: step 6: crop
+        retval = ni_to_avframe_copy(s, out, &ai_ctx->api_dst_frame.data.packet, nb_planes);
+        if (retval < 0) {
+            av_log(ctx, AV_LOG_ERROR, "Error: hvsplus cannot copy ai frame to avframe\n");
+            ret = AVERROR(EIO);
+            goto failed_out;
+        }
+    }
+
+    av_frame_free(&in);
+    return ff_filter_frame(link->dst->outputs[0], out);
+
+failed_out:
+    if (out)
+        av_frame_free(&out);
+
+    av_frame_free(&in);
+    return ret;
+}
+
+static int filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterContext *ctx = link->dst;
+    int ret;
+
+    if (in == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "Error: in frame is null\n");
+        return AVERROR(EINVAL);
+    }
+
+    ret = filter_frame_internal(link, in);
+
+    return ret;
+}
+
+#define OFFSET(x) offsetof(NetIntHvsplusContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+
+static const AVOption ni_hvsplus_options[] = {
+    { "devid",   "device to operate in swframe mode", OFFSET(devid),        AV_OPT_TYPE_INT, {.i64 = 0},  -1, INT_MAX,                  FLAGS },
+    { "level",   "level of modification",             OFFSET(level),        AV_OPT_TYPE_INT, {.i64 = 2},  1,  2,                        FLAGS },
+    { "width",   "Specify the output frame width.",   OFFSET(out_width),    AV_OPT_TYPE_INT, {.i64 = -1}, -1, NI_MAX_RESOLUTION_WIDTH,  FLAGS },
+    { "height",  "Specify the output frame height.",  OFFSET(out_height),   AV_OPT_TYPE_INT, {.i64 = -1}, -1, NI_MAX_RESOLUTION_HEIGHT, FLAGS },
+    { "mode",    "filter mode",                       OFFSET(channel_mode), AV_OPT_TYPE_INT, {.i64 = 0},  0,  1,                        FLAGS, "mode" },
+        { "YUV",    "process channels Y, U, and V", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, FLAGS, "mode" },
+        { "Y_only", "process only channel Y",       0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, FLAGS, "mode" },
+    { "timeout", "Timeout for AI operations",         OFFSET(ai_timeout),   AV_OPT_TYPE_INT,  {.i64 = NI_DEFAULT_KEEP_ALIVE_TIMEOUT}, NI_MIN_KEEP_ALIVE_TIMEOUT, NI_MAX_KEEP_ALIVE_TIMEOUT, FLAGS, "AI_timeout" },
+    NI_FILT_OPTION_KEEPALIVE10,
+    NI_FILT_OPTION_BUFFER_LIMIT,
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ni_hvsplus);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = output_config_props,
+    },
+};
+
+FFFilter ff_vf_hvsplus_ni_quadra = {
+    .p.name         = "ni_quadra_hvsplus",
+    .p.description  = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra hvsplus v" NI_XCODER_REVISION),
+    .p.priv_class   = &ni_hvsplus_class,
+    .priv_size      = sizeof(NetIntHvsplusContext),
+    .init           = init,
+    .uninit         = uninit,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    FILTER_QUERY_FUNC(query_formats),
+};
diff --git a/libavfilter/vf_hwupload_ni_quadra.c b/libavfilter/vf_hwupload_ni_quadra.c
new file mode 100644
index 0000000000..e479482b72
--- /dev/null
+++ b/libavfilter/vf_hwupload_ni_quadra.c
@@ -0,0 +1,297 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/buffer.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+
+#include "nifilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "libavutil/mem.h"
+#include "video.h"
+
+typedef struct NetIntUploadContext {
+    const AVClass *class;
+    int device_idx;
+    const char *device_name;
+    AVBufferRef *hwdevice;
+    AVBufferRef *hwframe;
+    int keep_alive_timeout; /* keep alive timeout setting */
+} NetIntUploadContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    NetIntUploadContext *nictx = ctx->priv;
+    AVHWFramesConstraints *constraints = NULL;
+    const enum AVPixelFormat *input_pix_fmts, *output_pix_fmts;
+    AVFilterFormats *input_formats = NULL;
+    int err, i;
+
+    if (!nictx->hwdevice)
+        return AVERROR(ENOMEM);
+
+    constraints = av_hwdevice_get_hwframe_constraints(nictx->hwdevice, NULL);
+    if (!constraints) {
+        err = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    input_pix_fmts  = constraints->valid_sw_formats;
+    output_pix_fmts = constraints->valid_hw_formats;
+
+    input_formats = ff_make_format_list(output_pix_fmts);
+    if (!input_formats) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    if (input_pix_fmts) {
+        for (i = 0; input_pix_fmts[i] != AV_PIX_FMT_NONE; i++) {
+            err = ff_add_format(&input_formats, input_pix_fmts[i]);
+            if (err < 0)
+                goto fail;
+        }
+    }
+
+    if ((err = ff_formats_ref(input_formats, &ctx->inputs[0]->outcfg.formats)) < 0 ||
+        (err = ff_formats_ref(ff_make_format_list(output_pix_fmts),
+                              &ctx->outputs[0]->incfg.formats)) < 0)
+        goto fail;
+
+    av_hwframe_constraints_free(&constraints);
+    return 0;
+
+fail:
+    av_buffer_unref(&nictx->hwdevice);
+    av_hwframe_constraints_free(&constraints);
+    return err;
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    NetIntUploadContext *s = ctx->priv;
+    char buf[64] = { 0 };
+
+    snprintf(buf, sizeof(buf), "%d", s->device_idx);
+
+    if (s->device_name) {
+        int tmp_guid_id;
+        tmp_guid_id = ni_rsrc_get_device_by_block_name(s->device_name, NI_DEVICE_TYPE_UPLOAD);
+        if (tmp_guid_id != NI_RETCODE_FAILURE) {
+            av_log(ctx, AV_LOG_VERBOSE,"User set uploader device_name=%s. This will replace uploader_device_id\n",s->device_name);
+            memset(buf, 0, sizeof(buf));
+            snprintf(buf, sizeof(buf), "%d", tmp_guid_id);
+        }
+        else {
+            av_log(ctx, AV_LOG_VERBOSE, "Uploader device_name=%s not found. Use default value of uploader device_num=%d instead.\n",s->device_name,s->device_idx);
+        }
+    }
+
+    return av_hwdevice_ctx_create(&s->hwdevice, AV_HWDEVICE_TYPE_NI_QUADRA, buf, NULL, 0);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NetIntUploadContext *s = ctx->priv;
+
+    av_buffer_unref(&s->hwframe);
+    av_buffer_unref(&s->hwdevice);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = ctx->inputs[0];
+    NetIntUploadContext *s = ctx->priv;
+    AVNIFramesContext *pub_ctx;
+    AVHWFramesContext *hwframe_ctx;
+    int ret;
+
+    av_buffer_unref(&s->hwframe);
+
+    if (inlink->format == outlink->format) {
+        // The input is already a hardware format, so we just want to
+        // pass through the input frames in their own hardware context.
+        FilterLink *li = ff_filter_link(inlink);
+        if (!li->hw_frames_ctx) {
+            av_log(ctx, AV_LOG_ERROR, "No input hwframe context.\n");
+            return AVERROR(EINVAL);
+        }
+        FilterLink *lo = ff_filter_link(outlink);
+        lo->hw_frames_ctx = av_buffer_ref(li->hw_frames_ctx);
+        if (!lo->hw_frames_ctx)
+            return AVERROR(ENOMEM);
+        return 0;
+    }
+
+    s->hwframe = av_hwframe_ctx_alloc(s->hwdevice);
+    if (!s->hwframe)
+        return AVERROR(ENOMEM);
+
+    hwframe_ctx            = (AVHWFramesContext*)s->hwframe->data;
+    hwframe_ctx->format    = AV_PIX_FMT_NI_QUAD;
+    hwframe_ctx->sw_format = inlink->format;
+    hwframe_ctx->width     = inlink->w;
+    hwframe_ctx->height    = inlink->h;
+    pub_ctx = (AVNIFramesContext*)hwframe_ctx->hwctx;
+    pub_ctx->keep_alive_timeout = s->keep_alive_timeout;
+    FilterLink *li = ff_filter_link(inlink);
+    pub_ctx->framerate     = li->frame_rate;
+
+    ret = av_hwframe_ctx_init(s->hwframe);
+    if (ret < 0)
+        return ret;
+
+    FilterLink *lo = ff_filter_link(outlink);
+    lo->hw_frames_ctx = av_buffer_ref(s->hwframe);
+    if (!lo->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterContext   *ctx = link->dst;
+    AVFilterLink  *outlink = ctx->outputs[0];
+    AVFrame *out = NULL;
+    int ret;
+
+    if (in->format == outlink->format)
+        return ff_filter_frame(outlink, in);
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    out->width  = in->width;
+    out->height = in->height;
+
+    ret = av_hwframe_transfer_data(out, in, 0);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Error transferring data to the Quadra\n");
+        goto fail;
+    }
+
+    ret = av_frame_copy_props(out, in);
+    if (ret < 0)
+        goto fail;
+
+    av_frame_free(&in);
+
+    return ff_filter_frame(ctx->outputs[0], out);
+
+fail:
+    av_frame_free(&in);
+    av_frame_free(&out);
+    return ret;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink  *inlink = ctx->inputs[0];
+    AVFilterLink  *outlink = ctx->outputs[0];
+    AVFrame *frame = NULL;
+    int ret = 0;
+    FilterLink *lo = ff_filter_link(outlink);
+    AVHWFramesContext *hwfc = (AVHWFramesContext *) lo->hw_frames_ctx->data;
+    AVNIFramesContext *f_hwctx = (AVNIFramesContext*) hwfc->hwctx;
+
+    // Forward the status on output link to input link, if the status is set, discard all queued frames
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    av_log(ctx, AV_LOG_TRACE, "%s: ready %u inlink framequeue %u available_frame %d outlink framequeue %u frame_wanted %d\n",
+        __func__, ctx->ready, ff_inlink_queued_frames(inlink), ff_inlink_check_available_frame(inlink), ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+
+    if (ff_inlink_check_available_frame(inlink)) {
+        if (inlink->format != outlink->format) {
+            ret = ni_device_session_query_buffer_avail(&f_hwctx->api_ctx, NI_DEVICE_TYPE_UPLOAD);
+
+            if (ret == NI_RETCODE_ERROR_UNSUPPORTED_FW_VERSION) {
+                av_log(ctx, AV_LOG_WARNING, "No backpressure support in FW\n");
+            } else if (ret < 0) {
+                av_log(ctx, AV_LOG_WARNING, "%s: query ret %d, ready %u inlink framequeue %u available_frame %d outlink framequeue %u frame_wanted %d - return NOT READY\n",
+                       __func__, ret, ctx->ready, ff_inlink_queued_frames(inlink), ff_inlink_check_available_frame(inlink), ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+                return FFERROR_NOT_READY;
+            }
+        }
+
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+
+        ret = filter_frame(inlink, frame);
+        if (ret >= 0) {
+            ff_filter_set_ready(ctx, 300);
+        }
+        return ret;
+    }
+
+    // We did not get a frame from input link, check its status
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+
+    // We have no frames yet from input link and no EOF, so request some.
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+#define OFFSET(x) offsetof(NetIntUploadContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+// default device_idx -1 for uploader to auto balance
+static const AVOption ni_upload_options[] = {
+    { "device",  "Number of the device to use", OFFSET(device_idx),  AV_OPT_TYPE_INT,    {.i64 = -1},  -1,        INT_MAX,  FLAGS},
+    { "devname", "Name of the device to use",   OFFSET(device_name), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, FLAGS},
+    NI_FILT_OPTION_KEEPALIVE,
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ni_upload);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+};
+
+FFFilter ff_vf_hwupload_ni_quadra = {
+    .p.name         = "ni_quadra_hwupload",
+    .p.description  = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra upload a system memory frame to a device v" NI_XCODER_REVISION),
+    .p.priv_class   = &ni_upload_class,
+    .priv_size      = sizeof(NetIntUploadContext),
+    .init           = init,
+    .uninit         = uninit,
+    .activate       = activate,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    FILTER_QUERY_FUNC(query_formats),
+};
diff --git a/libavfilter/vf_overlay_ni.c b/libavfilter/vf_overlay_ni.c
new file mode 100644
index 0000000000..7be8f45cec
--- /dev/null
+++ b/libavfilter/vf_overlay_ni.c
@@ -0,0 +1,1397 @@
+/*
+ * Copyright (c) 2010 Stefano Sabatini
+ * Copyright (c) 2010 Baptiste Coudurier
+ * Copyright (c) 2007 Bobby Bingham
+ * Copyright (c) 2021 NetInt
+ *
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * overlay one video on top of another
+ */
+
+#include "nifilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "libavutil/mem.h"
+#include "fftools/ffmpeg_sched.h"
+#include "libavutil/common.h"
+#include "libavutil/eval.h"
+#include "libavutil/avstring.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/timestamp.h"
+#include "libavutil/hwcontext.h"
+#include "drawutils.h"
+#include "framesync.h"
+#include "video.h"
+#include <ni_device_api.h>
+
+static const char *const var_names[] = {
+    "main_w",    "W", ///< width  of the main    video
+    "main_h",    "H", ///< height of the main    video
+    "overlay_w", "w", ///< width  of the overlay video
+    "overlay_h", "h", ///< height of the overlay video
+    "hsub",
+    "vsub",
+    "x",
+    "y",
+    "t",
+    NULL
+};
+
+enum var_name {
+    VAR_MAIN_W,    VAR_MW,
+    VAR_MAIN_H,    VAR_MH,
+    VAR_OVERLAY_W, VAR_OW,
+    VAR_OVERLAY_H, VAR_OH,
+    VAR_HSUB,
+    VAR_VSUB,
+    VAR_X,
+    VAR_Y,
+    VAR_T,
+    VAR_VARS_NB
+};
+
+#define MAIN    0
+#define OVERLAY 1
+
+typedef struct NetIntOverlayContext {
+    const AVClass *class;
+    int x, y;                   ///< position of overlaid picture
+
+    uint8_t main_has_alpha;
+    uint8_t overlay_has_alpha;
+    int alpha_format;
+
+    FFFrameSync fs;
+
+    int hsub, vsub;             ///< chroma subsampling values
+
+    double var_values[VAR_VARS_NB];
+    char *x_expr, *y_expr;
+
+    AVExpr *x_pexpr, *y_pexpr;
+
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_dst_frame;
+
+    AVBufferRef* out_frames_ref;
+
+    int initialized;
+    int session_opened;
+    int crop_session_opened;
+    int keep_alive_timeout; /* keep alive timeout setting */
+    int inplace;
+    int buffer_limit;
+    uint16_t ui16CropFrameIdx;
+    ni_session_context_t crop_api_ctx;
+    ni_session_data_io_t crop_api_dst_frame;
+} NetIntOverlayContext;
+
+static int process_frame(FFFrameSync *fs);
+static int process_frame_inplace(FFFrameSync *fs);
+
+static int set_expr(AVExpr **pexpr, const char *expr, const char *option, void *log_ctx)
+{
+    int ret;
+    AVExpr *old = NULL;
+
+    if (*pexpr)
+        old = *pexpr;
+    ret = av_expr_parse(pexpr, expr, var_names,
+                        NULL, NULL, NULL, NULL, 0, log_ctx);
+    if (ret < 0) {
+        av_log(log_ctx, AV_LOG_ERROR,
+               "Error when evaluating the expression '%s' for %s\n",
+               expr, option);
+        *pexpr = old;
+        return ret;
+    }
+
+    av_expr_free(old);
+    return 0;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    /* We only accept hardware frames */
+    static const enum AVPixelFormat pix_fmts[] =
+        {AV_PIX_FMT_NI_QUAD, AV_PIX_FMT_NONE};
+    AVFilterFormats *formats;
+
+    formats = ff_make_format_list(pix_fmts);
+
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(ctx, formats);
+}
+
+static int init_framesync(AVFilterContext *ctx)
+{
+    NetIntOverlayContext *s = ctx->priv;
+    int ret, i;
+
+    s->fs.on_event = s->inplace ? process_frame_inplace : process_frame;
+    s->fs.opaque   = s;
+    ret = ff_framesync_init(&s->fs, ctx, ctx->nb_inputs);
+    if (ret < 0)
+        return ret;
+
+    for (i = 0; i < ctx->nb_inputs; i++) {
+        FFFrameSyncIn *in = &s->fs.in[i];
+        in->before    = EXT_STOP;
+        in->after     = EXT_INFINITY;
+        in->sync      = i ? 1 : 2;
+        in->time_base = ctx->inputs[i]->time_base;
+    }
+
+    return ff_framesync_configure(&s->fs);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NetIntOverlayContext *s = ctx->priv;
+
+    ff_framesync_uninit(&s->fs);
+    av_expr_free(s->x_pexpr); s->x_pexpr = NULL;
+    av_expr_free(s->y_pexpr); s->y_pexpr = NULL;
+
+    if (s->api_dst_frame.data.frame.p_buffer) {
+        ni_frame_buffer_free(&s->api_dst_frame.data.frame);
+    }
+
+    if (s->crop_api_dst_frame.data.frame.p_buffer) {
+        ni_frame_buffer_free(&s->crop_api_dst_frame.data.frame);
+    }
+
+    if (s->session_opened) {
+        ni_device_session_close(&s->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&s->api_ctx);
+    }
+
+    if (s->crop_session_opened) {
+        ni_device_session_close(&s->crop_api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&s->crop_api_ctx);
+    }
+
+    av_buffer_unref(&s->out_frames_ref);
+}
+
+static inline int normalize_xy(double d, int chroma_sub)
+{
+    if (isnan(d))
+        return INT_MAX;
+    return (int)d & ~((1 << chroma_sub) - 1);
+}
+
+static void eval_expr(AVFilterContext *ctx)
+{
+    NetIntOverlayContext *s = ctx->priv;
+
+    s->var_values[VAR_X] = av_expr_eval(s->x_pexpr, s->var_values, NULL);
+    s->var_values[VAR_Y] = av_expr_eval(s->y_pexpr, s->var_values, NULL);
+    s->var_values[VAR_X] = av_expr_eval(s->x_pexpr, s->var_values, NULL);
+    s->x = normalize_xy(s->var_values[VAR_X], s->hsub);
+    s->y = normalize_xy(s->var_values[VAR_Y], s->vsub);
+}
+
+static int overlay_intersects_background(
+    const AVFilterContext *ctx,
+    const AVFrame *overlay,
+    const AVFrame *main)
+{
+    const NetIntOverlayContext *s = (NetIntOverlayContext *) ctx->priv;
+
+    if (s->x >= main->width)
+        return 0;
+
+    if (s->y >= main->height)
+        return 0;
+
+    if (s->x + overlay->width <= 0)
+        return 0;
+
+    if (s->y + overlay->height <= 0)
+        return 0;
+
+    return 1;
+}
+
+static void calculate_src_rectangle(
+    int *px,
+    int *py,
+    int *pw,
+    int *ph,
+    int bgnd_x,
+    int bgnd_y,
+    int bgnd_w,
+    int bgnd_h,
+    int ovly_x,
+    int ovly_y,
+    int ovly_w,
+    int ovly_h)
+
+{
+    *px = (ovly_x > 0) ? 0 : -ovly_x;
+    *py = (ovly_y > 0) ? 0 : -ovly_y;
+
+    if (ovly_x > 0) {
+        *pw = FFMIN(bgnd_w - ovly_x, ovly_w);
+    } else {
+        *pw = FFMIN(ovly_w + ovly_x, bgnd_w);
+    }
+
+    if (ovly_y > 0) {
+        *ph = FFMIN(bgnd_h - ovly_y, ovly_h);
+    } else {
+        *ph = FFMIN(ovly_h + ovly_y, bgnd_h);
+    }
+}
+
+static void calculate_dst_rectangle(
+    int *px,
+    int *py,
+    int *pw,
+    int *ph,
+    int bgnd_x,
+    int bgnd_y,
+    int bgnd_w,
+    int bgnd_h,
+    int ovly_x,
+    int ovly_y,
+    int ovly_w,
+    int ovly_h)
+{
+    *px = FFMAX(0, ovly_x);
+    *py = FFMAX(0, ovly_y);
+
+    if (ovly_x > 0) {
+        *pw = FFMIN(bgnd_w - ovly_x, ovly_w);
+    } else {
+        *pw = FFMIN(ovly_w + ovly_x, bgnd_w);
+    }
+
+    if (ovly_y > 0) {
+        *ph = FFMIN(bgnd_h - ovly_y, ovly_h);
+    } else {
+        *ph = FFMIN(ovly_h + ovly_y, bgnd_h);
+    }
+}
+
+static const enum AVPixelFormat alpha_pix_fmts[] = {
+    AV_PIX_FMT_ARGB, AV_PIX_FMT_ABGR, AV_PIX_FMT_RGBA,
+    AV_PIX_FMT_BGRA, AV_PIX_FMT_NONE
+};
+
+static int config_input_overlay(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx  = inlink->dst;
+    NetIntOverlayContext  *s = inlink->dst->priv;
+    AVHWFramesContext *in_frames_ctx;
+    const AVPixFmtDescriptor *pix_desc;
+    int ret;
+
+    FilterLink *li = ff_filter_link(inlink);
+    if (li == NULL) {
+        av_log(inlink->dst, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+    if (!in_frames_ctx) {
+        return AVERROR(EINVAL);
+    }
+
+    pix_desc = av_pix_fmt_desc_get(in_frames_ctx->sw_format);
+
+    if (in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_10_TILE_4X4) {
+        av_log(ctx, AV_LOG_ERROR, "tile4x4 10b not supported for overlay!\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* Finish the configuration by evaluating the expressions
+       now when both inputs are configured. */
+    s->var_values[VAR_MAIN_W   ] = s->var_values[VAR_MW] = ctx->inputs[MAIN   ]->w;
+    s->var_values[VAR_MAIN_H   ] = s->var_values[VAR_MH] = ctx->inputs[MAIN   ]->h;
+    s->var_values[VAR_OVERLAY_W] = s->var_values[VAR_OW] = ctx->inputs[OVERLAY]->w;
+    s->var_values[VAR_OVERLAY_H] = s->var_values[VAR_OH] = ctx->inputs[OVERLAY]->h;
+    s->var_values[VAR_HSUB]  = 1<<pix_desc->log2_chroma_w;
+    s->var_values[VAR_VSUB]  = 1<<pix_desc->log2_chroma_h;
+    s->var_values[VAR_X]     = NAN;
+    s->var_values[VAR_Y]     = NAN;
+    s->var_values[VAR_T]     = NAN;
+
+    if ((ret = set_expr(&s->x_pexpr,      s->x_expr,      "x",      ctx)) < 0 ||
+        (ret = set_expr(&s->y_pexpr,      s->y_expr,      "y",      ctx)) < 0)
+        return ret;
+
+    s->overlay_has_alpha = ff_fmt_is_in(in_frames_ctx->sw_format,
+                                        alpha_pix_fmts);
+
+    av_log(ctx, AV_LOG_VERBOSE,
+           "main w:%d h:%d fmt:%s overlay w:%d h:%d fmt:%s\n",
+           ctx->inputs[MAIN]->w, ctx->inputs[MAIN]->h,
+           av_get_pix_fmt_name(ctx->inputs[MAIN]->format),
+           ctx->inputs[OVERLAY]->w, ctx->inputs[OVERLAY]->h,
+           av_get_pix_fmt_name(ctx->inputs[OVERLAY]->format));
+    return 0;
+}
+
+static int init_out_pool(AVFilterContext *ctx)
+{
+    NetIntOverlayContext *s = ctx->priv;
+    AVHWFramesContext *out_frames_ctx;
+    int pool_size = DEFAULT_NI_FILTER_POOL_SIZE;
+
+    out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+    pool_size += ctx->extra_hw_frames > 0 ? ctx->extra_hw_frames : 0;
+    s->buffer_limit = 1;
+
+    /* Create frame pool on device */
+    return ff_ni_build_frame_pool(&s->api_ctx, out_frames_ctx->width,
+                                  out_frames_ctx->height, out_frames_ctx->sw_format,
+                                  pool_size,
+                                  s->buffer_limit);
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    NetIntOverlayContext *s = ctx->priv;
+    AVHWFramesContext *in_frames_ctx;
+    AVHWFramesContext *out_frames_ctx;
+    int ret = 0;
+
+    outlink->w = ctx->inputs[MAIN]->w;
+    outlink->h = ctx->inputs[MAIN]->h;
+    FilterLink *li = ff_filter_link(ctx->inputs[MAIN]);
+    FilterLink *lo = ff_filter_link(outlink);
+    lo->frame_rate = li->frame_rate;
+    outlink->time_base = ctx->inputs[MAIN]->time_base;
+
+    ret = init_framesync(ctx);
+    if (ret < 0)
+        return ret;
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+    if (!s->inplace) {
+        FilterLink *lt;
+        AVHWFramesContext *tmp_frames_ctx;
+
+        s->out_frames_ref = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+        if (!s->out_frames_ref)
+            return AVERROR(ENOMEM);
+
+        out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+        out_frames_ctx->format    = AV_PIX_FMT_NI_QUAD;
+        out_frames_ctx->width     = outlink->w;
+        out_frames_ctx->height    = outlink->h;
+
+        av_hwframe_ctx_init(s->out_frames_ref);
+
+        lt = ff_filter_link(ctx->inputs[OVERLAY]);
+        tmp_frames_ctx = (AVHWFramesContext *)lt->hw_frames_ctx->data;
+
+        // HW does not support NV12 Compress + RGB -> NV12 Compress
+        if (((in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_8_TILE_4X4) ||
+            (in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_10_TILE_4X4)) &&
+            ((tmp_frames_ctx->sw_format >= AV_PIX_FMT_ARGB) &&
+            (tmp_frames_ctx->sw_format <= AV_PIX_FMT_BGRA))) {
+            out_frames_ctx->sw_format = AV_PIX_FMT_NV12;
+            av_log(ctx, AV_LOG_WARNING, "Overlay output is changed to nv12\n");
+        } else {
+            out_frames_ctx->sw_format = in_frames_ctx->sw_format;
+        }
+
+        out_frames_ctx->initial_pool_size =
+            NI_OVERLAY_ID; // Repurposed as identity code
+    } else {
+        s->out_frames_ref = av_buffer_ref(li->hw_frames_ctx);
+    }
+
+    av_buffer_unref(&lo->hw_frames_ctx);
+
+    lo->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+    if (!lo->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    return ret;
+}
+
+static int do_intermediate_crop_and_overlay(AVFilterContext *ctx,
+                                            AVFrame *overlay, AVFrame *frame)
+{
+    NetIntOverlayContext *s = (NetIntOverlayContext *) ctx->priv;
+    AVHWFramesContext    *main_frame_ctx,*ovly_frame_ctx;
+    niFrameSurface1_t    *frame_surface;
+    ni_retcode_t          retcode;
+    uint16_t              ui16FrameIdx;
+    int                   main_scaler_format,ovly_scaler_format;
+    int                   flags;
+    int                   crop_x,crop_y,crop_w,crop_h;
+    int                   src_x,src_y,src_w,src_h;
+
+    main_frame_ctx = (AVHWFramesContext *) frame->hw_frames_ctx->data;
+    main_scaler_format =
+        ff_ni_ffmpeg_to_gc620_pix_fmt(main_frame_ctx->sw_format);
+
+    ovly_frame_ctx = (AVHWFramesContext *) overlay->hw_frames_ctx->data;
+    ovly_scaler_format =
+        ff_ni_ffmpeg_to_gc620_pix_fmt(ovly_frame_ctx->sw_format);
+
+    /* Allocate a ni_frame_t for the intermediate crop operation */
+    retcode = ni_frame_buffer_alloc_hwenc(&s->crop_api_dst_frame.data.frame,
+                                          ctx->inputs[OVERLAY]->w,
+                                          ctx->inputs[OVERLAY]->h,
+                                          0);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Can't allocate interim crop frame\n");
+        return AVERROR(ENOMEM);
+    }
+
+    calculate_dst_rectangle(&crop_x, &crop_y, &crop_w, &crop_h,
+                            0, 0, frame->width, frame->height,
+                            FFALIGN(s->x,2), FFALIGN(s->y,2),
+                            overlay->width, overlay->height);
+
+    frame_surface = (niFrameSurface1_t *) frame->data[3];
+
+    /* Assign a device input frame. Send incoming frame index to crop session */
+    retcode = ni_device_alloc_frame(
+        &s->crop_api_ctx,
+        FFALIGN(ctx->inputs[MAIN]->w, 2),
+        FFALIGN(ctx->inputs[MAIN]->h, 2),
+        main_scaler_format,
+        0,
+        crop_w,
+        crop_h,
+        crop_x,
+        crop_y,
+        0,
+        frame_surface->ui16FrameIdx,
+        NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Can't assign input crop frame %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    /* Allocate destination frame. This acquires a frame from the pool */
+    retcode = ni_device_alloc_frame(
+        &s->crop_api_ctx,
+        FFALIGN(ctx->inputs[OVERLAY]->w, 2),
+        FFALIGN(ctx->inputs[OVERLAY]->h, 2),
+        ff_ni_ffmpeg_to_gc620_pix_fmt(AV_PIX_FMT_RGBA),
+        NI_SCALER_FLAG_IO,
+        crop_w,
+        crop_h,
+        0,
+        0,
+        0,
+        -1,
+        NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_DEBUG, "Can't allocate output crop frame %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    retcode = ni_device_session_read_hwdesc(&s->crop_api_ctx,
+                                            &s->crop_api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "No cropped output frame %d\n", retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    /* Get the acquired frame */
+    frame_surface = (niFrameSurface1_t *)
+        s->crop_api_dst_frame.data.frame.p_data[3];
+    s->ui16CropFrameIdx = frame_surface->ui16FrameIdx;
+
+    /* Overlay the icon over the intermediate cropped frame */
+
+    /* Allocate a ni_frame_t for the intermediate overlay */
+    retcode = ni_frame_buffer_alloc_hwenc(&s->api_dst_frame.data.frame,
+                                          ctx->inputs[OVERLAY]->w,
+                                          ctx->inputs[OVERLAY]->h,
+                                          0);
+
+    if (retcode < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Can't allocate interim ovly frame\n");
+        return AVERROR(ENOMEM);
+    }
+
+    frame_surface = (niFrameSurface1_t *) overlay->data[3];
+    ui16FrameIdx = frame_surface->ui16FrameIdx;
+
+    calculate_src_rectangle(&src_x, &src_y, &src_w, &src_h,
+                            0, 0, frame->width, frame->height,
+                            FFALIGN(s->x,2), FFALIGN(s->y,2),
+                            overlay->width, overlay->height);
+
+    /* Assign input frame to intermediate overlay session */
+    retcode = ni_device_alloc_frame(
+        &s->api_ctx,
+        FFALIGN(ctx->inputs[OVERLAY]->w, 2),
+        FFALIGN(ctx->inputs[OVERLAY]->h, 2),
+        ovly_scaler_format,
+        0,
+        src_w,
+        src_h,
+        src_x,
+        src_y,
+        0,
+        ui16FrameIdx,
+        NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Can't assign input overlay frame %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    /* In-place overlay frame. Send down frame index of background frame */
+    flags = NI_SCALER_FLAG_IO;                        /* Configure output */
+    flags |= s->alpha_format ? NI_SCALER_FLAG_PA : 0; /* Premultiply/straight */
+
+    retcode = ni_device_alloc_frame(
+       &s->api_ctx,
+       FFALIGN(ctx->inputs[OVERLAY]->w, 2),
+       FFALIGN(ctx->inputs[OVERLAY]->h, 2),
+       ff_ni_ffmpeg_to_gc620_pix_fmt(AV_PIX_FMT_RGBA),
+       flags,
+       crop_w,
+       crop_h,
+       0,
+       0,
+       0,
+       s->ui16CropFrameIdx,
+       NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_DEBUG, "Can't overlay frame for output %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    retcode = ni_device_session_read_hwdesc(&s->api_ctx,
+                                            &s->api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "Can't acquire intermediate frame %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    return NI_RETCODE_SUCCESS;
+}
+
+static int process_frame(FFFrameSync *fs)
+{
+    AVFilterContext      *ctx = fs->parent;
+    NetIntOverlayContext *s = (NetIntOverlayContext *) ctx->priv;
+    AVHWFramesContext    *main_frame_ctx,*ovly_frame_ctx;
+    AVNIDeviceContext    *pAVNIDevCtx;
+    AVFilterLink         *inlink_main = ctx->inputs[MAIN];
+    AVFilterLink         *outlink = ctx->outputs[0];
+    AVFilterLink         *inlink_overlay = ctx->inputs[OVERLAY];
+    AVFrame              *frame = NULL;
+    AVFrame              *overlay = NULL;
+    AVFrame              *out = NULL;
+    niFrameSurface1_t    *frame_surface,*new_frame_surface;
+    int flags, main_cardno, ovly_cardno;
+    int main_scaler_format, ovly_scaler_format;
+    ni_retcode_t retcode;
+    uint16_t tempFIDOverlay = 0;
+    uint16_t tempFIDFrame   = 0;
+
+    av_log(ctx, AV_LOG_TRACE, "%s: ready %u inlink framequeue %u available_frame %d inlink_overlay framequeue %u available_frame %d outlink framequeue %u frame_wanted %d\n",
+        __func__, ctx->ready,
+        ff_inlink_queued_frames(inlink_main), ff_inlink_check_available_frame(inlink_main),
+        ff_inlink_queued_frames(inlink_overlay), ff_inlink_check_available_frame(inlink_overlay),
+        ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+
+    // Consume from inlink framequeue only when outlink framequeue is empty, to prevent filter from exhausting all pre-allocated device buffers
+    if (ff_inlink_check_available_frame(outlink))
+        return FFERROR_NOT_READY;
+
+    /* ff_framesync_get_frame() always returns 0 for hw frames */
+    ff_framesync_get_frame(fs, OVERLAY, &overlay, 0);
+
+    if (!overlay) {
+        ff_framesync_get_frame(fs, MAIN, &frame, 1);
+        return ff_filter_frame(ctx->outputs[0], frame);
+    }
+
+    ff_framesync_get_frame(fs, MAIN, &frame, 0);
+
+    frame->pts =
+        av_rescale_q(fs->pts, fs->time_base, ctx->outputs[0]->time_base);
+
+    if (overlay) {
+        s->var_values[VAR_OVERLAY_W] = s->var_values[VAR_OW] = overlay->width;
+        s->var_values[VAR_OVERLAY_H] = s->var_values[VAR_OH] = overlay->height;
+    }
+
+    s->var_values[VAR_MAIN_W   ] = s->var_values[VAR_MW] = frame->width;
+    s->var_values[VAR_MAIN_H   ] = s->var_values[VAR_MH] = frame->height;
+    s->var_values[VAR_T] = frame->pts == AV_NOPTS_VALUE ?
+                            NAN : frame->pts * av_q2d(inlink_main->time_base);
+
+    // This can satisfy some customers or demos to modify the location when using ni_overlay
+    set_expr(&s->x_pexpr, s->x_expr,"x", ctx);
+    set_expr(&s->y_pexpr, s->y_expr,"y", ctx);
+
+    eval_expr(ctx);
+    av_log(ctx, AV_LOG_DEBUG, "x:%f xi:%d y:%f yi:%d t:%f\n",
+           s->var_values[VAR_X], s->x,
+           s->var_values[VAR_Y], s->y,
+           s->var_values[VAR_T]);
+
+    main_frame_ctx = (AVHWFramesContext *) frame->hw_frames_ctx->data;
+    main_scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(main_frame_ctx->sw_format);
+
+    main_cardno = ni_get_cardno(frame);
+
+    if (overlay) {
+        ovly_frame_ctx = (AVHWFramesContext *) overlay->hw_frames_ctx->data;
+        ovly_scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(ovly_frame_ctx->sw_format);
+        ovly_cardno        = ni_get_cardno(overlay);
+
+        if (main_cardno != ovly_cardno) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Main/Overlay frames on different cards\n");
+            return AVERROR(EINVAL);
+        }
+    } else {
+        ovly_scaler_format = 0;
+    }
+
+    if (!s->initialized) {
+        retcode = ni_device_session_context_init(&s->api_ctx);
+        if (retcode < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "ni overlay filter session context init failure\n");
+            return retcode;
+        }
+
+        pAVNIDevCtx = (AVNIDeviceContext *)main_frame_ctx->device_ctx->hwctx;
+        s->api_ctx.device_handle = pAVNIDevCtx->cards[main_cardno];
+        s->api_ctx.blk_io_handle = pAVNIDevCtx->cards[main_cardno];
+
+        s->api_ctx.hw_id              = main_cardno;
+        s->api_ctx.device_type        = NI_DEVICE_TYPE_SCALER;
+        s->api_ctx.scaler_operation   = NI_SCALER_OPCODE_OVERLAY;
+        s->api_ctx.keep_alive_timeout = s->keep_alive_timeout;
+
+        retcode = ni_device_session_open(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Can't open device session on card %d\n",
+                   main_cardno);
+            ni_device_session_close(&s->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+            ni_device_session_context_clear(&s->api_ctx);
+            return retcode;
+        }
+
+        s->session_opened = 1;
+
+        if (!((av_strstart(outlink->dst->filter->name, "ni_quadra", NULL)) || (av_strstart(outlink->dst->filter->name, "hwdownload", NULL)))) {
+           inlink_main->dst->extra_hw_frames = (DEFAULT_FRAME_THREAD_QUEUE_SIZE > 1) ? DEFAULT_FRAME_THREAD_QUEUE_SIZE : 0;
+        }
+
+        retcode = init_out_pool(inlink_main->dst);
+        if (retcode < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Internal output allocation failed rc = %d\n", retcode);
+            return retcode;
+        }
+
+        AVHWFramesContext *out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+        AVNIFramesContext *out_ni_ctx = (AVNIFramesContext *)out_frames_ctx->hwctx;
+        ni_cpy_hwframe_ctx(main_frame_ctx, out_frames_ctx);
+        ni_device_session_copy(&s->api_ctx, &out_ni_ctx->api_ctx);
+
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(main_frame_ctx->sw_format);
+
+        if (((frame && frame->color_range == AVCOL_RANGE_JPEG) ||
+            (overlay && overlay->color_range == AVCOL_RANGE_JPEG)) && !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+            av_log(ctx, AV_LOG_WARNING,
+                   "WARNING: Full color range input, limited color range output\n");
+        }
+
+        if (av_buffer_get_ref_count(frame->buf[0]) > 1) {
+            av_log(ctx, AV_LOG_WARNING,
+                   "WARNING: In-place overlay being used after split "
+                   "filter may cause corruption\n");
+        }
+
+        s->initialized = 1;
+    }
+
+    /* Allocate a ni_frame for the overlay output */
+    retcode = ni_frame_buffer_alloc_hwenc(&s->api_dst_frame.data.frame,
+                                          outlink->w,
+                                          outlink->h,
+                                          0);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        return AVERROR(ENOMEM);
+    }
+
+    if (overlay) {
+        frame_surface = (niFrameSurface1_t *)overlay->data[3];
+        tempFIDOverlay = frame_surface->ui16FrameIdx;
+    } else {
+        frame_surface = NULL;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark(NULL);
+#endif
+
+    /*
+     * Assign an input frame for overlay picture. Send the
+     * incoming hardware frame index to the scaler manager.
+     */
+    retcode = ni_device_alloc_frame(
+        &s->api_ctx,
+        overlay ? FFALIGN(overlay->width, 2) : 0,
+        overlay ? FFALIGN(overlay->height, 2) : 0,
+        ovly_scaler_format,
+        (frame_surface && frame_surface->encoding_type == 2) ? NI_SCALER_FLAG_CMP : 0,
+        overlay ? FFALIGN(overlay->width, 2) : 0,
+        overlay ? FFALIGN(overlay->height, 2) : 0,
+        s->x,
+        s->y,
+        frame_surface ? (int)frame_surface->ui32nodeAddress : 0,
+        frame_surface ? frame_surface->ui16FrameIdx : 0,
+        NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_DEBUG, "Can't assign frame for overlay input %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    frame_surface = (niFrameSurface1_t *) frame->data[3];
+    if (frame_surface == NULL) {
+        return AVERROR(EINVAL);
+    }
+
+    tempFIDFrame = frame_surface->ui16FrameIdx;
+    /*
+     * Allocate device output frame from the pool. We also send down the frame index
+     * of the background frame to the scaler manager.
+     */
+    flags = (s->alpha_format ? NI_SCALER_FLAG_PA : 0) | NI_SCALER_FLAG_IO;
+    flags |= (frame_surface && frame_surface->encoding_type == 2) ? NI_SCALER_FLAG_CMP : 0;
+    retcode = ni_device_alloc_frame(&s->api_ctx,
+                                    FFALIGN(frame->width, 2),
+                                    FFALIGN(frame->height, 2),
+                                    main_scaler_format,
+                                    flags,
+                                    FFALIGN(frame->width, 2),
+                                    FFALIGN(frame->height, 2),
+                                    0,                              // x
+                                    0,                              // y
+                                    frame_surface->ui32nodeAddress,
+                                    frame_surface->ui16FrameIdx,
+                                    NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_DEBUG, "Can't allocate frame for output %d\n",
+               retcode);
+        return AVERROR(ENOMEM);
+    }
+
+    out = av_frame_alloc();
+    if (!out) {
+        return AVERROR(ENOMEM);
+    }
+
+    av_frame_copy_props(out,frame);
+
+    out->width = outlink->w;
+    out->height = outlink->h;
+    out->format = AV_PIX_FMT_NI_QUAD;
+
+    /* Quadra 2D engine always outputs limited color range */
+    out->color_range = AVCOL_RANGE_MPEG;
+
+    /* Reference the new hw frames context */
+    out->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+    out->data[3] = av_malloc(sizeof(niFrameSurface1_t));
+
+    if (!out->data[3]) {
+        av_frame_free(&out);
+        return AVERROR(ENOMEM);
+    }
+
+    /* Copy the frame surface from the incoming frame */
+    memcpy(out->data[3], frame->data[3], sizeof(niFrameSurface1_t));
+
+    /* Set the new frame index */
+    retcode = ni_device_session_read_hwdesc(&s->api_ctx, &s->api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Can't acquire output frame %d\n", retcode);
+        av_frame_free(&out);
+        return AVERROR(ENOMEM);
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark("ni_quadra_overlay");
+#endif
+
+    frame_surface = (niFrameSurface1_t *) out->data[3];
+    new_frame_surface = (niFrameSurface1_t *) s->api_dst_frame.data.frame.p_data[3];
+    frame_surface->ui16FrameIdx   = new_frame_surface->ui16FrameIdx;
+    frame_surface->ui16session_ID = new_frame_surface->ui16session_ID;
+    frame_surface->device_handle  = new_frame_surface->device_handle;
+    frame_surface->output_idx     = new_frame_surface->output_idx;
+    frame_surface->src_cpu        = new_frame_surface->src_cpu;
+    frame_surface->dma_buf_fd     = 0;
+
+    ff_ni_set_bit_depth_and_encoding_type(&frame_surface->bit_depth,
+                                          &frame_surface->encoding_type,
+                                          main_frame_ctx->sw_format);
+
+    /* Remove ni-split specific assets */
+    frame_surface->ui32nodeAddress = 0;
+
+    frame_surface->ui16width = out->width;
+    frame_surface->ui16height = out->height;
+
+    av_log(ctx, AV_LOG_DEBUG,
+           "%s:IN trace ui16FrameIdx = [%d] and [%d] --> out [%d] \n", __FILE__,
+           tempFIDFrame, tempFIDOverlay, frame_surface->ui16FrameIdx);
+
+    out->buf[0] = av_buffer_create(out->data[3], sizeof(niFrameSurface1_t), ff_ni_frame_free, NULL, 0);
+
+    return ff_filter_frame(ctx->outputs[0], out);
+}
+
+static int process_frame_inplace(FFFrameSync *fs)
+{
+    AVFilterContext      *ctx = fs->parent;
+    NetIntOverlayContext *s = (NetIntOverlayContext *) ctx->priv;
+    AVHWFramesContext    *main_frame_ctx,*ovly_frame_ctx;
+    AVNIDeviceContext    *pAVNIDevCtx;
+    AVFilterLink         *inlink_main = ctx->inputs[MAIN];
+    AVFilterLink         *inlink_overlay = ctx->inputs[OVERLAY];
+    AVFilterLink         *outlink = ctx->outputs[0];
+    AVFrame              *frame = NULL;
+    AVFrame              *overlay = NULL;
+    AVFrame              *out = NULL;
+    niFrameSurface1_t    *frame_surface;
+    ni_retcode_t          retcode;
+    uint16_t              ovly_frame_idx = 0;
+    uint16_t              main_frame_idx = 0;
+    int                   flags, main_cardno, ovly_cardno;
+    int                   main_scaler_format, ovly_scaler_format;
+    int                   src_x, src_y, src_w, src_h;
+    int                   dst_x, dst_y, dst_w, dst_h;
+
+    av_log(ctx, AV_LOG_TRACE, "%s: ready %u inlink framequeue %u available_frame %d "
+           "inlink_overlay framequeue %u available_frame %d outlink framequeue %u "
+           "frame_wanted %d\n", __func__, ctx->ready, ff_inlink_queued_frames(inlink_main),
+           ff_inlink_check_available_frame(inlink_main),
+           ff_inlink_queued_frames(inlink_overlay),
+           ff_inlink_check_available_frame(inlink_overlay), ff_inlink_queued_frames(outlink),
+           ff_outlink_frame_wanted(outlink));
+
+    // Consume from inlink framequeue only when outlink framequeue is empty, to prevent filter from exhausting all pre-allocated device buffers
+    if (ff_inlink_check_available_frame(outlink))
+        return FFERROR_NOT_READY;
+
+    ff_framesync_get_frame(fs, OVERLAY, &overlay, 0);
+
+    if (!overlay) {
+        ff_framesync_get_frame(fs, MAIN, &frame, 1);
+        return ff_filter_frame(ctx->outputs[0], frame);
+    }
+
+    ff_framesync_get_frame(fs, MAIN, &frame, 0);
+
+    frame->pts =
+        av_rescale_q(fs->pts, fs->time_base, ctx->outputs[0]->time_base);
+
+    s->var_values[VAR_OVERLAY_W] = s->var_values[VAR_OW] = overlay->width;
+    s->var_values[VAR_OVERLAY_H] = s->var_values[VAR_OH] = overlay->height;
+    s->var_values[VAR_MAIN_W   ] = s->var_values[VAR_MW] = frame->width;
+    s->var_values[VAR_MAIN_H   ] = s->var_values[VAR_MH] = frame->height;
+    s->var_values[VAR_T] = frame->pts == AV_NOPTS_VALUE ?
+                            NAN : frame->pts * av_q2d(ctx->inputs[0]->time_base);
+
+    // Allow location modification
+    set_expr(&s->x_pexpr, s->x_expr, "x", ctx);
+    set_expr(&s->y_pexpr, s->y_expr, "y", ctx);
+
+    eval_expr(ctx);
+    av_log(ctx, AV_LOG_DEBUG, "x:%f xi:%d y:%f yi:%d t:%f\n",
+           s->var_values[VAR_X], s->x,
+           s->var_values[VAR_Y], s->y,
+           s->var_values[VAR_T]);
+
+    main_frame_ctx = (AVHWFramesContext *) frame->hw_frames_ctx->data;
+    main_scaler_format =
+        ff_ni_ffmpeg_to_gc620_pix_fmt(main_frame_ctx->sw_format);
+
+    frame_surface = (niFrameSurface1_t *) frame->data[3];
+
+    if (frame_surface == NULL)
+        return AVERROR(EINVAL);
+
+    main_frame_idx = frame_surface->ui16FrameIdx;
+
+    frame_surface = (niFrameSurface1_t *) overlay->data[3];
+
+    if (frame_surface == NULL)
+        return AVERROR(EINVAL);
+
+    ovly_frame_idx = frame_surface->ui16FrameIdx;
+
+    main_cardno = ni_get_cardno(frame);
+
+    ovly_frame_ctx = (AVHWFramesContext *) overlay->hw_frames_ctx->data;
+    ovly_scaler_format =
+        ff_ni_ffmpeg_to_gc620_pix_fmt(ovly_frame_ctx->sw_format);
+    ovly_cardno = ni_get_cardno(overlay);
+
+    if (main_cardno != ovly_cardno) {
+        av_log(ctx, AV_LOG_ERROR, "Main/Overlay frames on different cards\n");
+        return AVERROR(EINVAL);
+    }
+
+    // If overlay does not intersect the background, pass
+    // the frame through the overlay filter.
+    if (!overlay_intersects_background(ctx, overlay, frame)) {
+        out = av_frame_clone(frame);
+
+        if (!out) {
+            av_log(ctx, AV_LOG_ERROR, "Can't clone frame\n");
+            return AVERROR(ENOMEM);
+        }
+        return ff_filter_frame(ctx->outputs[0], out);
+    }
+
+    if (!s->initialized) {
+        /* Set up a scaler session for the in-place overlay */
+        retcode = ni_device_session_context_init(&s->api_ctx);
+        if (retcode < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "ni overlay filter session context init failure\n");
+            return retcode;
+        }
+
+        pAVNIDevCtx = (AVNIDeviceContext *)main_frame_ctx->device_ctx->hwctx;
+        s->api_ctx.device_handle = pAVNIDevCtx->cards[main_cardno];
+        s->api_ctx.blk_io_handle = pAVNIDevCtx->cards[main_cardno];
+
+        s->api_ctx.hw_id              = main_cardno;
+        s->api_ctx.device_type        = NI_DEVICE_TYPE_SCALER;
+        s->api_ctx.scaler_operation   = NI_SCALER_OPCODE_IPOVLY;
+        s->api_ctx.keep_alive_timeout = s->keep_alive_timeout;
+
+        retcode = ni_device_session_open(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Can't open device session on card %d\n",
+                   main_cardno);
+            ni_device_session_close(&s->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+            ni_device_session_context_clear(&s->api_ctx);
+            return retcode;
+        }
+
+        s->session_opened = 1;
+
+        // If the in-place overlay is rgba over yuv, we need to set up
+        // an extra intermediate crop session.
+        if (s->overlay_has_alpha && !s->main_has_alpha) {
+            /* Set up a scaler session for the crop operation */
+            retcode = ni_device_session_context_init(&s->crop_api_ctx);
+            if (retcode < 0) {
+                av_log(ctx, AV_LOG_ERROR,
+                       "ni overlay filter (crop) session context init failure\n");
+                return retcode;
+            }
+
+            s->crop_api_ctx.device_handle = pAVNIDevCtx->cards[main_cardno];
+            s->crop_api_ctx.blk_io_handle = pAVNIDevCtx->cards[main_cardno];
+
+            s->crop_api_ctx.hw_id              = main_cardno;
+            s->crop_api_ctx.device_type        = NI_DEVICE_TYPE_SCALER;
+            s->crop_api_ctx.scaler_operation   = NI_SCALER_OPCODE_CROP;
+            s->crop_api_ctx.keep_alive_timeout = s->keep_alive_timeout;
+
+            retcode = ni_device_session_open(&s->crop_api_ctx,
+                                             NI_DEVICE_TYPE_SCALER);
+            if (retcode != NI_RETCODE_SUCCESS) {
+                av_log(ctx, AV_LOG_ERROR,
+                       "Can't open device session on card %d\n", main_cardno);
+                ni_device_session_close(&s->crop_api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+                ni_device_session_context_clear(&s->crop_api_ctx);
+                return retcode;
+            }
+
+            s->crop_session_opened = 1;
+
+            /* init the out pool for the crop session, make it rgba */
+            retcode = ff_ni_build_frame_pool(&s->crop_api_ctx, overlay->width,
+                                             overlay->height,
+                                             AV_PIX_FMT_RGBA, 1, 0);
+
+            if (retcode < 0) {
+                av_log(ctx, AV_LOG_ERROR,
+                       "Internal output allocation failed rc = %d\n", retcode);
+                return retcode;
+            }
+        }
+
+        if (!((av_strstart(outlink->dst->filter->name, "ni_quadra", NULL)) || (av_strstart(outlink->dst->filter->name, "hwdownload", NULL)))) {
+           inlink_main->dst->extra_hw_frames = ((DEFAULT_FRAME_THREAD_QUEUE_SIZE > 1) ? DEFAULT_FRAME_THREAD_QUEUE_SIZE : 0) - DEFAULT_NI_FILTER_POOL_SIZE;
+        }
+
+        retcode = init_out_pool(inlink_main->dst);
+        if (retcode < 0) {
+            av_log(inlink_main->dst, AV_LOG_ERROR,
+                   "Internal output allocation failed rc = %d\n", retcode);
+            return retcode;
+        }
+
+        s->initialized = 1;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark(NULL);
+#endif
+
+    /* For rgba over yuv, we do an intermediate crop and overlay */
+    if (s->overlay_has_alpha && !s->main_has_alpha) {
+        retcode = do_intermediate_crop_and_overlay(ctx, overlay, frame);
+
+        if (retcode < 0)
+            return retcode;
+
+        /* Allocate a ni_frame for the overlay output */
+        retcode = ni_frame_buffer_alloc_hwenc(&s->api_dst_frame.data.frame,
+                                              outlink->w,
+                                              outlink->h,
+                                              0);
+
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Can't allocate inplace overlay frame\n");
+            return AVERROR(ENOMEM);
+        }
+
+        calculate_src_rectangle(&src_x, &src_y, &src_w, &src_h,
+                                0, 0, frame->width, frame->height,
+                                FFALIGN(s->x,2),FFALIGN(s->y,2), overlay->width, overlay->height);
+
+        /*
+         * Assign an input frame for overlay picture. Send the
+         * incoming hardware frame index to the scaler manager.
+         */
+        retcode = ni_device_alloc_frame(
+            &s->api_ctx,
+            FFALIGN(overlay->width, 2),  // ovly width
+            FFALIGN(overlay->height, 2), // ovly height
+            ff_ni_ffmpeg_to_gc620_pix_fmt(AV_PIX_FMT_RGBA), // ovly pix fmt
+            0,                           // flags
+            src_w,                       // src rect width
+            src_h,                       // src rect height
+            0,                           // src rect x
+            0,                           // src rect y
+            0,                           // n/a
+            s->ui16CropFrameIdx,         // ovly frame idx
+            NI_DEVICE_TYPE_SCALER);
+
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Can't assign input overlay frame %d\n",
+                   retcode);
+            return AVERROR(ENOMEM);
+        }
+
+        calculate_dst_rectangle(&dst_x, &dst_y, &dst_w, &dst_h,
+                                0, 0, frame->width, frame->height,
+                                FFALIGN(s->x,2), FFALIGN(s->y, 2),
+                                overlay->width, overlay->height);
+
+        /*
+         * Allocate device output frame from the pool. We also send down the
+         * frame index of the background frame to the scaler manager.
+         */
+
+        /* configure the output */
+        flags = NI_SCALER_FLAG_IO;
+        /* premultiply vs straight alpha */
+        flags |= (s->alpha_format) ? NI_SCALER_FLAG_PA : 0;
+
+        retcode = ni_device_alloc_frame(
+            &s->api_ctx,
+            FFALIGN(frame->width, 2),       // main width
+            FFALIGN(frame->height, 2),      // main height
+            main_scaler_format,             // main pix fmt
+            flags,                          // flags
+            dst_w,                          // dst rect width
+            dst_h,                          // dst rect height
+            dst_x,                          // dst rect x
+            dst_y,                          // dst rect y
+            0,                              // n/a
+            main_frame_idx,                 // main frame idx
+            NI_DEVICE_TYPE_SCALER);
+
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Can't allocate overlay output %d\n",
+                   retcode);
+            return AVERROR(ENOMEM);
+        }
+
+        /* Set the new frame index */
+        retcode = ni_device_session_read_hwdesc(&s->api_ctx,
+                                                &s->api_dst_frame,
+                                                NI_DEVICE_TYPE_SCALER);
+
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Can't acquire output overlay frame %d\n", retcode);
+            return AVERROR(ENOMEM);
+        }
+    } else {
+        /* Not rgba over yuv. For yuv over yuv, yuv over rgba, */
+        /* rgba over rgba, we can perform an in-place overlay immediately. */
+
+        /* Allocate ni_frame for the overlay output */
+        retcode = ni_frame_buffer_alloc_hwenc(&s->api_dst_frame.data.frame,
+                                              outlink->w,
+                                              outlink->h,
+                                              0);
+
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Cannot allocate in-place frame\n");
+            return AVERROR(ENOMEM);
+        }
+
+        calculate_src_rectangle(&src_x, &src_y, &src_w, &src_h,
+                                0, 0, frame->width, frame->height,
+                                FFALIGN(s->x,2), FFALIGN(s->y,2),
+                                overlay->width, overlay->height);
+
+        /*
+         * Assign input frame for overlay picture. Sends the
+         * incoming hardware frame index to the scaler manager.
+         */
+        retcode = ni_device_alloc_frame(
+            &s->api_ctx,
+            FFALIGN(overlay->width, 2),         // overlay width
+            FFALIGN(overlay->height, 2),        // overlay height
+            ovly_scaler_format,                 // overlay pix fmt
+            0,                                  // flags
+            src_w,                              // src rect width
+            src_h,                              // src rect height
+            src_x,                              // src rect x
+            src_y,                              // src rect y
+            0,                                  // n/a
+            ovly_frame_idx,                     // overlay frame idx
+            NI_DEVICE_TYPE_SCALER);
+
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Can't assign frame for overlay input %d\n", retcode);
+            return AVERROR(ENOMEM);
+        }
+
+        /* In-place overlay frame. Send down frame index of background frame */
+
+        /* Configure the output */
+        flags = NI_SCALER_FLAG_IO;
+        /* Premultiply vs straight alpha */
+        flags |= s->alpha_format ? NI_SCALER_FLAG_PA : 0;
+
+        calculate_dst_rectangle(&dst_x,&dst_y,&dst_w,&dst_h,
+                                0,0,frame->width,frame->height,
+                                FFALIGN(s->x,2),FFALIGN(s->y,2),
+                                overlay->width,overlay->height);
+
+        retcode = ni_device_alloc_frame(
+            &s->api_ctx,
+            FFALIGN(frame->width, 2),       // main width
+            FFALIGN(frame->height, 2),      // main height
+            main_scaler_format,             // main pix fmt
+            flags,                          // flags
+            dst_w,                          // dst rect width
+            dst_h,                          // dst rect height
+            dst_x,                          // dst rect x
+            dst_y,                          // dst rect y
+            0,                              // n/a
+            main_frame_idx,                 // main frame idx
+            NI_DEVICE_TYPE_SCALER);
+
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Can't allocate frame for output ovly %d\n", retcode);
+            return AVERROR(ENOMEM);
+        }
+
+        retcode = ni_device_session_read_hwdesc(&s->api_ctx, &s->api_dst_frame,
+                                                NI_DEVICE_TYPE_SCALER);
+
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Can't acquire output frame of overlay %d\n", retcode);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark("ni_quadra_overlay");
+#endif
+
+    /* Do an in-place overlay onto the background frame */
+    out = av_frame_clone(frame);
+
+    if (!out) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot clone frame\n");
+        return AVERROR(ENOMEM);
+    }
+
+    /* Quadra 2D engine always outputs limited color range */
+    out->color_range = AVCOL_RANGE_MPEG;
+
+    if (s->overlay_has_alpha && !s->main_has_alpha) {
+        av_log(ctx, AV_LOG_DEBUG,
+            "%s:IN trace ui16FrameIdx = [%d] and [%d] and [%d] --> out [%d]\n",
+            __func__, main_frame_idx, ovly_frame_idx, s->ui16CropFrameIdx,
+            main_frame_idx);
+    } else {
+        av_log(ctx, AV_LOG_DEBUG,
+           "%s:IN trace ui16FrameIdx = [%d] and [%d] --> out [%d]\n",
+           __func__, main_frame_idx, ovly_frame_idx, main_frame_idx);
+    }
+
+    if (s->overlay_has_alpha && !s->main_has_alpha) {
+        ni_hwframe_buffer_recycle((niFrameSurface1_t *)
+                                  s->crop_api_dst_frame.data.frame.p_data[3],
+                                  (int32_t)s->crop_api_ctx.device_handle);
+    }
+
+    return ff_filter_frame(ctx->outputs[0], out);
+}
+
+/**
+ * Blend image in src to destination buffer dst at position (x, y).
+ */
+static int config_input_main(AVFilterLink *inlink)
+{
+    NetIntOverlayContext *s = inlink->dst->priv;
+    AVHWFramesContext *in_frames_ctx;
+    const AVPixFmtDescriptor *pix_desc;
+    
+    FilterLink *li = ff_filter_link(inlink);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(inlink->dst, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+    if (!in_frames_ctx) {
+        return AVERROR(EINVAL);
+    }
+
+    if (in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_10_TILE_4X4) {
+        av_log(inlink->dst, AV_LOG_ERROR, "tile4x4 10b not supported for overlay!\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->main_has_alpha = ff_fmt_is_in(in_frames_ctx->sw_format, alpha_pix_fmts);
+
+    pix_desc = av_pix_fmt_desc_get(in_frames_ctx->sw_format);
+
+    s->hsub = pix_desc->log2_chroma_w;
+    s->vsub = pix_desc->log2_chroma_h;
+
+    return 0;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    NetIntOverlayContext *s = ctx->priv;
+    return ff_framesync_activate(&s->fs);
+}
+
+#define OFFSET(x) offsetof(NetIntOverlayContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+
+static const AVOption ni_overlay_options[] = {
+    { "x",       "set the x expression", OFFSET(x_expr),       AV_OPT_TYPE_STRING, {.str = "0"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "y",       "set the y expression", OFFSET(y_expr),       AV_OPT_TYPE_STRING, {.str = "0"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "alpha",   "alpha format",         OFFSET(alpha_format), AV_OPT_TYPE_INT,    {.i64 = 0},   0,        1,        FLAGS, "alpha_format" },
+        { "straight",      "", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, .flags = FLAGS, .unit = "alpha_format" },
+        { "premultiplied", "", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, .flags = FLAGS, .unit = "alpha_format" },
+    { "inplace", "overlay in-place",     OFFSET(inplace),      AV_OPT_TYPE_BOOL,   {.i64 = 0},   0,        1,        FLAGS },
+    NI_FILT_OPTION_KEEPALIVE,
+    NI_FILT_OPTION_BUFFER_LIMIT,
+    { NULL }
+};
+
+// NOLINTNEXTLINE(clang-diagnostic-deprecated-declarations)
+FRAMESYNC_DEFINE_CLASS(ni_overlay, NetIntOverlayContext, fs);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input_main,
+    },
+    {
+        .name         = "overlay",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input_overlay,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+};
+
+FFFilter ff_vf_overlay_ni_quadra = {
+    .p.name         = "ni_quadra_overlay",
+    .p.description  = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra overlay a video source on top of the input v" NI_XCODER_REVISION),
+    .p.priv_class   = &ni_overlay_class,
+    .priv_size      = sizeof(NetIntOverlayContext),
+    .uninit         = uninit,
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    FILTER_QUERY_FUNC(query_formats),
+    .preinit        = ni_overlay_framesync_preinit,
+    .activate       = activate,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
diff --git a/libavfilter/vf_pad_ni.c b/libavfilter/vf_pad_ni.c
new file mode 100644
index 0000000000..f935377f84
--- /dev/null
+++ b/libavfilter/vf_pad_ni.c
@@ -0,0 +1,703 @@
+/*
+ * Copyright (c) 2007 Bobby Bingham
+ * Copyright (c) 2020 NetInt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * video padding filter
+ */
+
+#include <float.h>  /* DBL_MAX */
+
+#include "nifilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "libavutil/mem.h"
+#include "fftools/ffmpeg_sched.h"
+#include "video.h"
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/eval.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/colorspace.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "drawutils.h"
+
+static const char *const var_names[] = {
+    "in_w",   "iw",
+    "in_h",   "ih",
+    "out_w",  "ow",
+    "out_h",  "oh",
+    "x",
+    "y",
+    "a",
+    "sar",
+    "dar",
+    "hsub",
+    "vsub",
+    NULL
+};
+
+enum var_name {
+    VAR_IN_W,   VAR_IW,
+    VAR_IN_H,   VAR_IH,
+    VAR_OUT_W,  VAR_OW,
+    VAR_OUT_H,  VAR_OH,
+    VAR_X,
+    VAR_Y,
+    VAR_A,
+    VAR_SAR,
+    VAR_DAR,
+    VAR_HSUB,
+    VAR_VSUB,
+    VARS_NB
+};
+
+typedef struct NetIntPadContext {
+    const AVClass *class;
+    int w, h;               ///< output dimensions, a value of 0 will result in the input size
+    int x, y;               ///< offsets of the input area with respect to the padded area
+    int in_w, in_h;         ///< width and height for the padded input video, which has to be aligned to the chroma values in order to avoid chroma issues
+    int inlink_w, inlink_h;
+    AVRational aspect;
+
+    char *w_expr;           ///< width  expression string
+    char *h_expr;           ///< height expression string
+    char *x_expr;           ///< width  expression string
+    char *y_expr;           ///< height expression string
+    uint8_t rgba_color[4];  ///< color for the padding area
+    FFDrawContext draw;
+    FFDrawColor color;
+
+    AVBufferRef *out_frames_ref;
+
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_dst_frame;
+
+    int initialized;
+    int session_opened;
+    int keep_alive_timeout; /* keep alive timeout setting */
+
+    int auto_skip;
+    int skip_filter;
+    int buffer_limit;
+} NetIntPadContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] =
+        {AV_PIX_FMT_NI_QUAD, AV_PIX_FMT_NONE};
+    AVFilterFormats *formats;
+
+    formats = ff_make_format_list(pix_fmts);
+
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(ctx, formats);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NetIntPadContext *s = ctx->priv;
+
+    if (s->api_dst_frame.data.frame.p_buffer)
+        ni_frame_buffer_free(&s->api_dst_frame.data.frame);
+
+    if (s->session_opened) {
+        /* Close operation will free the device frames */
+        ni_device_session_close(&s->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&s->api_ctx);
+    }
+
+    av_buffer_unref(&s->out_frames_ref);
+}
+
+static int init_out_pool(AVFilterContext *ctx)
+{
+    NetIntPadContext  *s = ctx->priv;
+    AVHWFramesContext *out_frames_ctx;
+    int pool_size = DEFAULT_NI_FILTER_POOL_SIZE;
+
+    out_frames_ctx = (AVHWFramesContext*)s->out_frames_ref->data;
+    pool_size += ctx->extra_hw_frames > 0 ? ctx->extra_hw_frames : 0;
+    s->buffer_limit = 1;
+
+    /* Create frame pool on device */
+    return ff_ni_build_frame_pool(&s->api_ctx,
+                                  out_frames_ctx->width, out_frames_ctx->height,
+                                  out_frames_ctx->sw_format,
+                                  pool_size,
+                                  s->buffer_limit);
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    NetIntPadContext *s = ctx->priv;
+    AVRational adjusted_aspect = s->aspect;
+    int ret;
+    double var_values[VARS_NB], res;
+    char *expr;
+    AVHWFramesContext *avhwctx;
+
+    if (inlink->format == AV_PIX_FMT_NI_QUAD) {
+        FilterLink *li = ff_filter_link(inlink);
+        if (li->hw_frames_ctx == NULL) {
+            av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+            return AVERROR(EINVAL);
+        }
+        avhwctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+        if (ff_draw_init(&s->draw, avhwctx->sw_format, 0) < 0)
+            return AVERROR(EINVAL);
+    } else {
+        if (ff_draw_init(&s->draw, inlink->format, 0) < 0)
+            return AVERROR(EINVAL);
+    }
+
+    ff_draw_color(&s->draw, &s->color, s->rgba_color);
+
+    var_values[VAR_IN_W]  = var_values[VAR_IW] = inlink->w;
+    var_values[VAR_IN_H]  = var_values[VAR_IH] = inlink->h;
+    var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN;
+    var_values[VAR_OUT_H] = var_values[VAR_OH] = NAN;
+    var_values[VAR_A]     = (double) inlink->w / inlink->h;
+    var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ?
+        (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1;
+    var_values[VAR_DAR]   = var_values[VAR_A] * var_values[VAR_SAR];
+    var_values[VAR_HSUB]  = 1 << s->draw.hsub_max;
+    var_values[VAR_VSUB]  = 1 << s->draw.vsub_max;
+
+    /* evaluate width and height */
+    av_expr_parse_and_eval(&res, s->w_expr, var_names, var_values, NULL, NULL,
+                           NULL, NULL, NULL, 0, ctx);
+    s->w                  = (int)res;
+    var_values[VAR_OUT_W] = var_values[VAR_OW] = res;
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto eval_fail;
+    s->h                  = (int)res;
+    var_values[VAR_OUT_H] = var_values[VAR_OH] = res;
+    if (!s->h)
+        var_values[VAR_OUT_H] = var_values[VAR_OH] = s->h = inlink->h;
+
+    /* evaluate the width again, as it may depend on the evaluated output height */
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto eval_fail;
+    s->w                  = (int)res;
+    var_values[VAR_OUT_W] = var_values[VAR_OW] = res;
+    if (!s->w)
+        var_values[VAR_OUT_W] = var_values[VAR_OW] = s->w = inlink->w;
+
+    if (adjusted_aspect.num && adjusted_aspect.den) {
+        adjusted_aspect = av_div_q(adjusted_aspect, inlink->sample_aspect_ratio);
+        if (s->h < av_rescale(s->w, adjusted_aspect.den, adjusted_aspect.num)) {
+            s->h = av_rescale(s->w, adjusted_aspect.den, adjusted_aspect.num);
+            var_values[VAR_OUT_H] = var_values[VAR_OH] = (double)s->h;
+        } else {
+            s->w = av_rescale(s->h, adjusted_aspect.num, adjusted_aspect.den);
+            var_values[VAR_OUT_W] = var_values[VAR_OW] = (double)s->w;
+        }
+    }
+
+    /* evaluate x and y */
+    av_expr_parse_and_eval(&res, s->x_expr, var_names, var_values, NULL, NULL,
+                           NULL, NULL, NULL, 0, ctx);
+    s->x              = (int)res;
+    var_values[VAR_X] = res;
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->y_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto eval_fail;
+    s->y              = (int)res;
+    var_values[VAR_Y] = res;
+    /* evaluate x again, as it may depend on the evaluated y value */
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->x_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto eval_fail;
+    s->x              = (int)res;
+    var_values[VAR_X] = res;
+
+    if (s->x < 0 || s->x + inlink->w > s->w) {
+        var_values[VAR_X] = (double)(s->w - inlink->w) / 2.0;
+        s->x              = (int)var_values[VAR_X];
+    }
+    if (s->y < 0 || s->y + inlink->h > s->h) {
+        var_values[VAR_Y] = (double)(s->h - inlink->h) / 2.0;
+        s->y              = (int)var_values[VAR_Y];
+    }
+
+    /* sanity check params */
+    if (s->w < 0 || s->h < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Negative values are not acceptable.\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->w    = ff_draw_round_to_sub(&s->draw, 0, -1, s->w);
+    s->h    = ff_draw_round_to_sub(&s->draw, 1, -1, s->h);
+    s->x    = ff_draw_round_to_sub(&s->draw, 0, -1, s->x);
+    s->y    = ff_draw_round_to_sub(&s->draw, 1, -1, s->y);
+    s->in_w = ff_draw_round_to_sub(&s->draw, 0, -1, inlink->w);
+    s->in_h = ff_draw_round_to_sub(&s->draw, 1, -1, inlink->h);
+    s->inlink_w = inlink->w;
+    s->inlink_h = inlink->h;
+
+    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d x:%d y:%d color:0x%02X%02X%02X%02X\n",
+           inlink->w, inlink->h, s->w, s->h, s->x, s->y,
+           s->rgba_color[0], s->rgba_color[1], s->rgba_color[2], s->rgba_color[3]);
+
+    if (s->x <  0 || s->y <  0                      ||
+        s->w <= 0 || s->h <= 0                      ||
+        (unsigned)s->x + (unsigned)inlink->w > s->w ||
+        (unsigned)s->y + (unsigned)inlink->h > s->h) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Input area %d:%d:%d:%d not within the padded area 0:0:%d:%d or zero-sized\n",
+               s->x, s->y, s->x + inlink->w, s->y + inlink->h, s->w, s->h);
+        return AVERROR(EINVAL);
+    }
+
+    if (s->w > NI_MAX_RESOLUTION_WIDTH || s->h > NI_MAX_RESOLUTION_HEIGHT) {
+        av_log(ctx, AV_LOG_ERROR, "Padded value (%dx%d) > 8192, not allowed\n", s->w, s->h);
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+
+eval_fail:
+    av_log(ctx, AV_LOG_ERROR,
+           "Error when evaluating the expression '%s'\n", expr);
+    return ret;
+}
+
+static int config_output(AVFilterLink *outlink)
+{
+    NetIntPadContext *s = outlink->src->priv;
+    AVHWFramesContext *in_frames_ctx;
+    AVHWFramesContext *out_frames_ctx;
+    AVFilterContext *ctx;
+
+    outlink->w = s->w;
+    outlink->h = s->h;
+
+    ctx = (AVFilterContext *)outlink->src;
+    FilterLink *li = ff_filter_link(ctx->inputs[0]);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+    if (in_frames_ctx->sw_format == AV_PIX_FMT_YUYV422 ||
+        in_frames_ctx->sw_format == AV_PIX_FMT_UYVY422) {
+        av_log(ctx, AV_LOG_ERROR, "yuyv/uyvy not supported\n");
+        return AVERROR(EINVAL);
+    }
+    if (in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_8_TILE_4X4 ||
+        in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_10_TILE_4X4) {
+        av_log(ctx, AV_LOG_ERROR, "tile4x4 not supported\n");
+        return AVERROR(EINVAL);
+    }
+
+    //skip the color range check
+    if (s->auto_skip &&
+        s->w == in_frames_ctx->width &&
+        s->h == in_frames_ctx->height &&
+        s->x == 0 &&
+        s->y == 0
+       ) {
+        //skip hardware pad
+        s->skip_filter = 1;
+
+        FilterLink *lt = ff_filter_link(outlink->src->inputs[0]);
+        s->out_frames_ref = av_buffer_ref(lt->hw_frames_ctx);
+        if (!s->out_frames_ref) {
+            return AVERROR(ENOMEM);
+        }
+        FilterLink *lo = ff_filter_link(outlink);
+        av_buffer_unref(&lo->hw_frames_ctx);
+        lo->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+        if (!lo->hw_frames_ctx) {
+            return AVERROR(ENOMEM);
+        }
+
+        return 0;
+    }
+
+    s->out_frames_ref = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+    if (!s->out_frames_ref)
+        return AVERROR(ENOMEM);
+
+    out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+
+    out_frames_ctx->format    = AV_PIX_FMT_NI_QUAD;
+    out_frames_ctx->width     = s->w;
+    out_frames_ctx->height    = s->h;
+    out_frames_ctx->sw_format = in_frames_ctx->sw_format;
+    out_frames_ctx->initial_pool_size =
+        NI_PAD_ID; // Repurposed as identity code
+
+    av_hwframe_ctx_init(s->out_frames_ref);
+
+    FilterLink *lo = ff_filter_link(outlink);
+    av_buffer_unref(&lo->hw_frames_ctx);
+
+    lo->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+    if (!lo->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    NetIntPadContext *s = inlink->dst->priv;
+    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFrame *out = NULL;
+    niFrameSurface1_t* frame_surface,*new_frame_surface;
+    AVHWFramesContext *pAVHFWCtx;
+    AVNIDeviceContext *pAVNIDevCtx;
+    ni_retcode_t retcode;
+    uint32_t ui32RgbaColor, scaler_format;
+    uint16_t tempFID;
+    int cardno;
+
+    frame_surface = (niFrameSurface1_t *) in->data[3];
+    if (frame_surface == NULL) {
+        return AVERROR(EINVAL);
+    }
+
+    pAVHFWCtx         = (AVHWFramesContext *)in->hw_frames_ctx->data;
+    pAVNIDevCtx       = (AVNIDeviceContext *)pAVHFWCtx->device_ctx->hwctx;
+    cardno            = ni_get_cardno(in);
+
+    if (s->skip_filter) {
+         //skip hardware pad
+        return ff_filter_frame(inlink->dst->outputs[0], in);
+    }
+
+    if (!s->initialized) {
+        retcode = ni_device_session_context_init(&s->api_ctx);
+        if (retcode < 0) {
+            av_log(inlink->dst, AV_LOG_ERROR,
+                   "ni pad filter session context init failure\n");
+            goto fail;
+        }
+
+        s->api_ctx.device_handle = pAVNIDevCtx->cards[cardno];
+        s->api_ctx.blk_io_handle = pAVNIDevCtx->cards[cardno];
+
+        s->api_ctx.hw_id             = cardno;
+        s->api_ctx.device_type       = NI_DEVICE_TYPE_SCALER;
+        s->api_ctx.scaler_operation  = NI_SCALER_OPCODE_PAD;
+        s->api_ctx.keep_alive_timeout = s->keep_alive_timeout;
+
+        retcode = ni_device_session_open(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(inlink->dst, AV_LOG_ERROR,
+                   "Can't open device session on card %d\n", cardno);
+
+            /* Close operation will free the device frames */
+            ni_device_session_close(&s->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+            ni_device_session_context_clear(&s->api_ctx);
+            goto fail;
+        }
+
+        s->session_opened = 1;
+
+        if (!((av_strstart(outlink->dst->filter->name, "ni_quadra", NULL)) || (av_strstart(outlink->dst->filter->name, "hwdownload", NULL)))) {
+           inlink->dst->extra_hw_frames = (DEFAULT_FRAME_THREAD_QUEUE_SIZE > 1) ? DEFAULT_FRAME_THREAD_QUEUE_SIZE : 0;
+        }
+
+        retcode = init_out_pool(inlink->dst);
+        if (retcode < 0) {
+            av_log(inlink->dst, AV_LOG_ERROR,
+                   "Internal output allocation failed rc = %d\n", retcode);
+            goto fail;
+        }
+
+        AVHWFramesContext *out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref->data;
+        AVNIFramesContext *out_ni_ctx = (AVNIFramesContext *)out_frames_ctx->hwctx;
+        ni_cpy_hwframe_ctx(pAVHFWCtx, out_frames_ctx);
+        ni_device_session_copy(&s->api_ctx, &out_ni_ctx->api_ctx);
+
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pAVHFWCtx->sw_format);
+
+        if ((in->color_range == AVCOL_RANGE_JPEG) && !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+            av_log(inlink->dst, AV_LOG_WARNING,
+                   "WARNING: Full color range input, limited color range output\n");
+        }
+
+        s->initialized = 1;
+    }
+
+    scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(pAVHFWCtx->sw_format);
+
+    retcode = ni_frame_buffer_alloc_hwenc(&s->api_dst_frame.data.frame,
+                                          outlink->w,
+                                          outlink->h,
+                                          0);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    av_log(inlink->dst, AV_LOG_DEBUG,
+           "inlink->w = %d;inlink->h = %d;outlink->w = %d;outlink->h = %d\n",
+           inlink->w, inlink->h, outlink->w, outlink->h);
+    av_log(inlink->dst, AV_LOG_DEBUG,
+           "s->w=%d;s->h=%d;s->x=%d;s->y=%d;c=%02x:%02x:%02x:%02x\n", s->w,
+           s->h, s->x, s->y, s->rgba_color[0], s->rgba_color[1],
+           s->rgba_color[2], s->rgba_color[3]);
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark(NULL);
+#endif
+
+    /*
+     * Allocate device input frame. This call won't actually allocate a frame,
+     * but sends the incoming hardware frame index to the scaler manager
+     */
+    retcode = ni_device_alloc_frame(&s->api_ctx,
+                                    FFALIGN(in->width, 2),
+                                    FFALIGN(in->height, 2),
+                                    scaler_format,
+                                    0,                      // input frame
+                                    in->width,  // src rectangle width
+                                    in->height, // src rectangle height
+                                    0,          // src rectangle x = 0
+                                    0,          // src rectangle y = 0
+                                    frame_surface->ui32nodeAddress,
+                                    frame_surface->ui16FrameIdx,
+                                    NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(inlink->dst, AV_LOG_DEBUG, "Can't allocate device input frame %d\n", retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    /* Scaler uses BGRA color, or ARGB in little-endian */
+    ui32RgbaColor = (s->rgba_color[3] << 24) | (s->rgba_color[0] << 16) |
+                    (s->rgba_color[1] << 8) | s->rgba_color[2];
+
+    /* Allocate device destination frame. This will acquire a frame from the pool */
+    retcode = ni_device_alloc_frame(&s->api_ctx,
+                          FFALIGN(outlink->w,2),
+                          FFALIGN(outlink->h,2),
+                          scaler_format,
+                          NI_SCALER_FLAG_IO,    // output frame
+                          in->width,            // dst rectangle width
+                          in->height,           // dst rectangle height
+                          s->x,                 // dst rectangle x
+                          s->y,                 // dst rectangle y
+                          ui32RgbaColor,        // rgba color
+                          -1,
+                          NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(inlink->dst, AV_LOG_DEBUG,
+               "Can't allocate device output frame %d\n", retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    out = av_frame_alloc();
+    if (!out) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    av_frame_copy_props(out,in);
+
+    out->width  = s->w;
+    out->height = s->h;
+
+    out->format = AV_PIX_FMT_NI_QUAD;
+
+    /* Quadra 2D engine always outputs limited color range */
+    out->color_range = AVCOL_RANGE_MPEG;
+
+    /* Reference the new hw frames context */
+    out->hw_frames_ctx = av_buffer_ref(s->out_frames_ref);
+
+    out->data[3] = av_malloc(sizeof(niFrameSurface1_t));
+
+    if (!out->data[3]) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    /* Copy the frame surface from the incoming frame */
+    memcpy(out->data[3], in->data[3], sizeof(niFrameSurface1_t));
+
+    /* Set the new frame index */
+    retcode = ni_device_session_read_hwdesc(&s->api_ctx, &s->api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(inlink->dst, AV_LOG_ERROR,
+               "Can't acquire output frame %d\n",retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark("ni_quadra_pad");
+#endif
+
+    tempFID = frame_surface->ui16FrameIdx;
+    frame_surface = (niFrameSurface1_t *) out->data[3];
+    new_frame_surface = (niFrameSurface1_t *) s->api_dst_frame.data.frame.p_data[3];
+    frame_surface->ui16FrameIdx   = new_frame_surface->ui16FrameIdx;
+    frame_surface->ui16session_ID = new_frame_surface->ui16session_ID;
+    frame_surface->device_handle  = new_frame_surface->device_handle;
+    frame_surface->output_idx     = new_frame_surface->output_idx;
+    frame_surface->src_cpu        = new_frame_surface->src_cpu;
+    frame_surface->dma_buf_fd     = 0;
+
+    ff_ni_set_bit_depth_and_encoding_type(&frame_surface->bit_depth,
+                                          &frame_surface->encoding_type,
+                                          pAVHFWCtx->sw_format);
+
+    /* Remove ni-split specific assets */
+    frame_surface->ui32nodeAddress = 0;
+
+    frame_surface->ui16width = out->width;
+    frame_surface->ui16height = out->height;
+
+    av_log(inlink->dst, AV_LOG_DEBUG,
+           "vf_pad_ni.c:IN trace ui16FrameIdx = [%d] --> out [%d] \n", tempFID,
+           frame_surface->ui16FrameIdx);
+
+    out->buf[0] = av_buffer_create(out->data[3], sizeof(niFrameSurface1_t), ff_ni_frame_free, NULL, 0);
+
+    av_frame_free(&in);
+
+    return ff_filter_frame(inlink->dst->outputs[0], out);
+
+fail:
+    av_frame_free(&in);
+    if (out)
+        av_frame_free(&out);
+    return retcode;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink  *inlink = ctx->inputs[0];
+    AVFilterLink  *outlink = ctx->outputs[0];
+    AVFrame *frame = NULL;
+    int ret = 0;
+    NetIntPadContext *s = inlink->dst->priv;
+
+    // Forward the status on output link to input link, if the status is set, discard all queued frames
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (ff_inlink_check_available_frame(inlink)) {
+        if (s->initialized) {
+            ret = ni_device_session_query_buffer_avail(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        }
+
+        if (ret == NI_RETCODE_ERROR_UNSUPPORTED_FW_VERSION) {
+            av_log(ctx, AV_LOG_WARNING, "No backpressure support in FW\n");
+        } else if (ret < 0) {
+            av_log(ctx, AV_LOG_WARNING, "%s: query ret %d, ready %u inlink framequeue %u available_frame %d outlink framequeue %u frame_wanted %d - return NOT READY\n",
+                __func__, ret, ctx->ready, ff_inlink_queued_frames(inlink), ff_inlink_check_available_frame(inlink), ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+            return FFERROR_NOT_READY;
+        }
+
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+
+        ret = filter_frame(inlink, frame);
+        if (ret >= 0) {
+            ff_filter_set_ready(ctx, 300);
+        }
+        return ret;
+    }
+
+    // We did not get a frame from input link, check its status
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+
+    // We have no frames yet from input link and no EOF, so request some.
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+#define OFFSET(x) offsetof(NetIntPadContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+
+static const AVOption ni_pad_options[] = {
+    { "width",  "set the pad area width expression",                        OFFSET(w_expr),     AV_OPT_TYPE_STRING,   {.str = "iw"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "w",      "set the pad area width expression",                        OFFSET(w_expr),     AV_OPT_TYPE_STRING,   {.str = "iw"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "height", "set the pad area height expression",                       OFFSET(h_expr),     AV_OPT_TYPE_STRING,   {.str = "ih"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "h",      "set the pad area height expression",                       OFFSET(h_expr),     AV_OPT_TYPE_STRING,   {.str = "ih"}, CHAR_MIN, CHAR_MAX, FLAGS },
+    { "x",      "set the x offset expression for the input image position", OFFSET(x_expr),     AV_OPT_TYPE_STRING,   {.str = "0"},  CHAR_MIN, CHAR_MAX, FLAGS },
+    { "y",      "set the y offset expression for the input image position", OFFSET(y_expr),     AV_OPT_TYPE_STRING,   {.str = "0"},  CHAR_MIN, CHAR_MAX, FLAGS },
+    { "color",  "set the color of the padded area border",                  OFFSET(rgba_color), AV_OPT_TYPE_COLOR,    {.str = "black"}, .flags = FLAGS },
+    { "aspect", "pad to fit an aspect instead of a resolution",             OFFSET(aspect),     AV_OPT_TYPE_RATIONAL, {.dbl = 0}, 0, DBL_MAX, FLAGS },
+    NI_FILT_OPTION_AUTO_SKIP,
+    NI_FILT_OPTION_KEEPALIVE,
+    NI_FILT_OPTION_BUFFER_LIMIT,
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ni_pad);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name             = "default",
+        .type             = AVMEDIA_TYPE_VIDEO,
+        .filter_frame     = filter_frame,
+        .config_props     = config_input,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_output,
+    },
+};
+
+FFFilter ff_vf_pad_ni_quadra = {
+    .p.name        = "ni_quadra_pad",
+    .p.description = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra pad the input video v" NI_XCODER_REVISION),
+    .p.priv_class  = &ni_pad_class,
+    .priv_size     = sizeof(NetIntPadContext),
+    .uninit        = uninit,
+    .activate      = activate,
+    .flags_internal= FF_FILTER_FLAG_HWFRAME_AWARE,
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    FILTER_QUERY_FUNC(query_formats),
+};
diff --git a/libavfilter/vf_rotate_ni.c b/libavfilter/vf_rotate_ni.c
new file mode 100644
index 0000000000..3ce394a131
--- /dev/null
+++ b/libavfilter/vf_rotate_ni.c
@@ -0,0 +1,764 @@
+/*
+ * Copyright (c) 2013 Stefano Sabatini
+ * Copyright (c) 2008 Vitor Sessak
+ * Copyright (c) 2022 NETINT Technologies Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * rotation filter, based on the FFmpeg rotate filter
+*/
+
+#include <string.h>
+
+#include "libavutil/eval.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/opt.h"
+
+#include "nifilter.h"
+#include "formats.h"
+#include "libavutil/mem.h"
+#include "fftools/ffmpeg_sched.h"
+#include "filters.h"
+#include "libavutil/avstring.h"
+
+#define BUFFER_WIDTH_PIXEL_ALIGNMENT 16
+
+static const char * const var_names[] = {
+    "in_w" , "iw",  ///< width of the input video
+    "in_h" , "ih",  ///< height of the input video
+    "out_w", "ow",  ///< width of the input video
+    "out_h", "oh",  ///< height of the input video
+    "hsub", "vsub",
+    NULL
+};
+
+enum var_name {
+    VAR_IN_W , VAR_IW,
+    VAR_IN_H , VAR_IH,
+    VAR_OUT_W, VAR_OW,
+    VAR_OUT_H, VAR_OH,
+    VAR_HSUB, VAR_VSUB,
+    VAR_VARS_NB
+};
+
+typedef struct NetIntRotContext {
+    const AVClass *class;
+
+    char *angle_expr_str;
+    AVExpr *angle_expr;
+
+    char *outw_expr_str, *outh_expr_str;
+    int outw, outh;
+
+    char *fillcolor_str;
+    uint8_t fillcolor[4];
+    bool fillcolor_enable;
+
+    int hsub, vsub;
+
+    double var_values[VAR_VARS_NB];
+
+    AVBufferRef *out_frames_ref;
+
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_dst_frame;
+
+    ni_frame_config_t output_frame_config;
+
+    bool initialized;
+    bool session_opened;
+    int64_t keep_alive_timeout;
+
+    int auto_skip;
+    int skip_filter;
+    int buffer_limit;
+} NetIntRotContext;
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_NI_QUAD, AV_PIX_FMT_NONE };
+    AVFilterFormats *fmts_list = NULL;
+
+    fmts_list = ff_make_format_list(pix_fmts);
+    if (!fmts_list) {
+        return AVERROR(ENOMEM);
+    }
+
+    return ff_set_common_formats(ctx, fmts_list);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    NetIntRotContext *rot = ctx->priv;
+
+    if (!strcmp(rot->fillcolor_str, "none")) {
+        rot->fillcolor_enable = false;
+    } else if (av_parse_color(rot->fillcolor, rot->fillcolor_str, -1, ctx) >= 0) {
+        rot->fillcolor_enable = true;
+    } else {
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NetIntRotContext *rot = ctx->priv;
+
+    av_expr_free(rot->angle_expr);
+    rot->angle_expr = NULL;
+
+    if (rot->api_dst_frame.data.frame.p_buffer) {
+        ni_frame_buffer_free(&rot->api_dst_frame.data.frame);
+    }
+
+    if (rot->session_opened) {
+        /* Close operation will free the device frames */
+        ni_device_session_close(&rot->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&rot->api_ctx);
+    }
+
+    av_buffer_unref(&rot->out_frames_ref);
+
+}
+
+static double get_rotated_w(void *opaque, double angle)
+{
+    NetIntRotContext *rot = opaque;
+    double inw = rot->var_values[VAR_IN_W];
+    double inh = rot->var_values[VAR_IN_H];
+    float sinx = (float)sin(angle);
+    float cosx = (float)cos(angle);
+
+    return FFMAX(0, inh * sinx) + FFMAX(0, -inw * cosx) +
+           FFMAX(0, inw * cosx) + FFMAX(0, -inh * sinx);
+}
+
+static double get_rotated_h(void *opaque, double angle)
+{
+    NetIntRotContext *rot = opaque;
+    double inw = rot->var_values[VAR_IN_W];
+    double inh = rot->var_values[VAR_IN_H];
+    float sinx = (float)sin(angle);
+    float cosx = (float)cos(angle);
+
+    return FFMAX(0, -inh * cosx) + FFMAX(0, -inw * sinx) +
+           FFMAX(0,  inh * cosx) + FFMAX(0,  inw * sinx);
+}
+
+static double (* const func1[])(void *, double) = {
+    get_rotated_w,
+    get_rotated_h,
+    NULL
+};
+
+static const char * const func1_names[] = {
+    "rotw",
+    "roth",
+    NULL
+};
+
+static int config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    NetIntRotContext *rot = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVHWFramesContext *in_frames_ctx, *out_frames_ctx;
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(inlink->format);
+    int ret;
+    double res;
+    char *expr;
+
+    rot->hsub = pixdesc->log2_chroma_w;
+    rot->vsub = pixdesc->log2_chroma_h;
+
+    rot->var_values[VAR_IN_W] = rot->var_values[VAR_IW] = inlink->w;
+    rot->var_values[VAR_IN_H] = rot->var_values[VAR_IH] = inlink->h;
+    rot->var_values[VAR_HSUB] = 1<<rot->hsub;
+    rot->var_values[VAR_VSUB] = 1<<rot->vsub;
+    rot->var_values[VAR_OUT_W] = rot->var_values[VAR_OW] = NAN;
+    rot->var_values[VAR_OUT_H] = rot->var_values[VAR_OH] = NAN;
+
+    av_expr_free(rot->angle_expr);
+    rot->angle_expr = NULL;
+    ret = av_expr_parse(&rot->angle_expr,
+                        // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
+                        expr = rot->angle_expr_str,
+                        var_names,
+                        func1_names,
+                        func1,
+                        NULL,
+                        NULL,
+                        0,
+                        ctx);
+    if (ret < 0) {
+        av_log(ctx,
+               AV_LOG_ERROR,
+               "Error occurred parsing angle expression '%s'\n",
+               rot->angle_expr_str);
+        return ret;
+    }
+
+#define SET_SIZE_EXPR(name, opt_name) do {                                         \
+    ret = av_expr_parse_and_eval(&res, expr = rot->name##_expr_str,                \
+                                 var_names, rot->var_values,                       \
+                                 func1_names, func1, NULL, NULL, rot, 0, ctx);     \
+    if (ret < 0 || isnan(res) || isinf(res) || res <= 0) {                         \
+        av_log(ctx, AV_LOG_ERROR,                                                  \
+               "Error parsing or evaluating expression for option %s: "            \
+               "invalid expression '%s' or non-positive or indefinite value %f\n", \
+               opt_name, expr, res);                                               \
+        return ret;                                                                \
+    }                                                                              \
+} while (0)
+
+    /* evaluate width and height */
+    av_expr_parse_and_eval(&res,
+                           // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
+                           expr = rot->outw_expr_str,
+                           var_names,
+                           rot->var_values,
+                           func1_names,
+                           func1,
+                           NULL,
+                           NULL,
+                           rot,
+                           0,
+                           ctx);
+    rot->var_values[VAR_OUT_W] = rot->var_values[VAR_OW] = res;
+    // NOLINTNEXTLINE(bugprone-incorrect-roundings, bugprone-narrowing-conversions)
+    rot->outw = ceil(res);
+
+    SET_SIZE_EXPR(outh, "out_h");
+    rot->var_values[VAR_OUT_H] = rot->var_values[VAR_OH] = ceil(res);
+
+    // NOLINTNEXTLINE(bugprone-incorrect-roundings, bugprone-narrowing-conversions)
+    rot->outh = ceil(res);
+
+    /* evaluate the width again, as it may depend on the evaluated output height */
+    SET_SIZE_EXPR(outw, "out_w");
+    rot->var_values[VAR_OUT_W] = rot->var_values[VAR_OW] = ceil(res);
+    // NOLINTNEXTLINE(bugprone-incorrect-roundings, bugprone-narrowing-conversions)
+    rot->outw = ceil(res);
+
+    // Quadra 2D engine only supports even pixel widths and heights
+    rot->outw = FFALIGN(rot->outw, 2);
+    rot->outh = FFALIGN(rot->outh, 2);
+
+    outlink->w = rot->outw;
+    outlink->h = rot->outh;
+
+    if (outlink->w > NI_MAX_RESOLUTION_WIDTH ||
+        outlink->h > NI_MAX_RESOLUTION_HEIGHT) {
+        av_log(ctx, AV_LOG_ERROR, "Resolution %dx%d > %dx%d is not allowed\n",
+               outlink->w, outlink->h,
+               NI_MAX_RESOLUTION_WIDTH, NI_MAX_RESOLUTION_HEIGHT);
+        return AVERROR(EINVAL);
+    }
+
+    FilterLink *li = ff_filter_link(ctx->inputs[0]);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext *) li->hw_frames_ctx->data;
+
+    if (in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_8_TILE_4X4 ||
+        in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_10_TILE_4X4) {
+        av_log(ctx, AV_LOG_ERROR, "tile4x4 not supported\n");
+        return AVERROR(EINVAL);
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE,
+           "w:%d h:%d fmt:%s sar:%d/%d -> w:%d h:%d fmt:%s sar:%d/%d\n",
+           inlink->w, inlink->h, av_get_pix_fmt_name(inlink->format),
+           inlink->sample_aspect_ratio.num, inlink->sample_aspect_ratio.den,
+           outlink->w, outlink->h, av_get_pix_fmt_name(outlink->format),
+           outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den);
+
+    //skip the color range check
+    if (rot->auto_skip &&
+        av_expr_eval(rot->angle_expr, rot->var_values, rot) == 0 &&
+        in_frames_ctx->width == outlink->w &&
+        in_frames_ctx->height == outlink->h
+       ) {
+        //skip hardware rotate
+        rot->skip_filter = 1;
+
+        FilterLink *lt = ff_filter_link(outlink->src->inputs[0]);
+        rot->out_frames_ref = av_buffer_ref(lt->hw_frames_ctx);
+        if (!rot->out_frames_ref) {
+            return AVERROR(ENOMEM);
+        }
+        FilterLink *lo = ff_filter_link(outlink);
+        av_buffer_unref(&lo->hw_frames_ctx);
+        lo->hw_frames_ctx = av_buffer_ref(rot->out_frames_ref);
+        if (!lo->hw_frames_ctx) {
+            return AVERROR(ENOMEM);
+        }
+        return 0;
+    }
+
+    rot->out_frames_ref = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+    if (!rot->out_frames_ref) {
+        return AVERROR(ENOMEM);
+    }
+
+    out_frames_ctx = (AVHWFramesContext *) rot->out_frames_ref->data;
+
+    out_frames_ctx->format = AV_PIX_FMT_NI_QUAD;
+    out_frames_ctx->width = rot->outw;
+    out_frames_ctx->height = rot->outh;
+    out_frames_ctx->sw_format = in_frames_ctx->sw_format;
+    out_frames_ctx->initial_pool_size = NI_ROTATE_ID; // Repurposed as identity code
+
+    av_hwframe_ctx_init(rot->out_frames_ref);
+
+    FilterLink *lo = ff_filter_link(ctx->outputs[0]);
+    av_buffer_unref(&lo->hw_frames_ctx);
+    lo->hw_frames_ctx = av_buffer_ref(rot->out_frames_ref);
+
+    if (!lo->hw_frames_ctx) {
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int init_out_pool(AVFilterContext *ctx)
+{
+    NetIntRotContext *rot = ctx->priv;
+    AVHWFramesContext *out_frames_context;
+    int pool_size = DEFAULT_NI_FILTER_POOL_SIZE;
+
+    out_frames_context = (AVHWFramesContext*)rot->out_frames_ref->data;
+    pool_size += ctx->extra_hw_frames > 0 ? ctx->extra_hw_frames : 0;
+    rot->buffer_limit = 1;
+
+    /* Create frame pool on device */
+    return ff_ni_build_frame_pool(&rot->api_ctx,
+                                  out_frames_context->width,
+                                  out_frames_context->height,
+                                  out_frames_context->sw_format,
+                                  pool_size,
+                                  rot->buffer_limit);
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFrame *out = NULL;
+    NetIntRotContext *rot = ctx->priv;
+    AVBufferRef *out_buffer_ref = rot->out_frames_ref;
+    AVHWFramesContext *in_frames_context = (AVHWFramesContext *) in->hw_frames_ctx->data;
+    AVNIDeviceContext *av_ni_device_context = (AVNIDeviceContext *) in_frames_context->device_ctx->hwctx;
+    ni_retcode_t ni_retcode = NI_RETCODE_SUCCESS;
+    niFrameSurface1_t *frame_surface = (niFrameSurface1_t *) in->data[3], *frame_surface2 = NULL;
+    ni_frame_config_t input_frame_config = {0};
+    uint32_t scaler_format;
+    int retcode = 0, rgba_color = 255 /* black opaque */, card_number =  ni_get_cardno(in);
+    int aligned_picture_width, rotated_picture_width, rotated_picture_height;
+    double angle;
+
+    if (!frame_surface) {
+        av_log(ctx, AV_LOG_ERROR, "ni rotate filter frame_surface should not be NULL\n");
+        return AVERROR(EINVAL);
+    }
+
+    //skip hardware rotate
+    if (rot->skip_filter) {
+        return ff_filter_frame(outlink, in);
+    }
+
+    if (!rot->initialized) {
+        ni_retcode = ni_device_session_context_init(&rot->api_ctx);
+        if (ni_retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "ni rotate filter session context init failed with %d\n", ni_retcode);
+            retcode = AVERROR(EINVAL);
+            goto FAIL;
+        }
+
+        rot->api_ctx.device_handle = rot->api_ctx.blk_io_handle = av_ni_device_context->cards[card_number];
+
+        rot->api_ctx.hw_id = card_number;
+        rot->api_ctx.device_type = NI_DEVICE_TYPE_SCALER;
+        rot->api_ctx.scaler_operation = NI_SCALER_OPCODE_ROTATE;
+        rot->api_ctx.keep_alive_timeout = rot->keep_alive_timeout;
+
+        ni_retcode = ni_device_session_open(&rot->api_ctx, NI_DEVICE_TYPE_SCALER);
+        if (ni_retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "ni rotate filter device session open failed with %d\n", ni_retcode);
+            retcode = ni_retcode;
+
+            /* Close operation will free the device frames */
+            ni_device_session_close(&rot->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+            ni_device_session_context_clear(&rot->api_ctx);
+            goto FAIL;
+        }
+
+        rot->session_opened = true;
+
+        if (!((av_strstart(outlink->dst->filter->name, "ni_quadra", NULL)) || (av_strstart(outlink->dst->filter->name, "hwdownload", NULL)))) {
+           inlink->dst->extra_hw_frames = (DEFAULT_FRAME_THREAD_QUEUE_SIZE > 1) ? DEFAULT_FRAME_THREAD_QUEUE_SIZE : 0;
+        }
+
+        ni_retcode = init_out_pool(inlink->dst);
+        if (ni_retcode != NI_RETCODE_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "ni rotate filter init out pool failed with %d\n", ni_retcode);
+            goto FAIL;
+        }
+
+        AVHWFramesContext *out_frames_ctx = (AVHWFramesContext *)out_buffer_ref->data;
+        AVNIFramesContext *out_ni_ctx = (AVNIFramesContext *)out_frames_ctx->hwctx;
+        ni_cpy_hwframe_ctx(in_frames_context, out_frames_ctx);
+        ni_device_session_copy(&rot->api_ctx, &out_ni_ctx->api_ctx);
+
+        AVHWFramesContext *pAVHFWCtx = (AVHWFramesContext *) in->hw_frames_ctx->data;
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pAVHFWCtx->sw_format);
+
+        if ((in->color_range == AVCOL_RANGE_JPEG) && !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+            av_log(ctx, AV_LOG_WARNING, "Full color range input, limited color output\n");
+        }
+
+        rot->initialized = true;
+    }
+
+    ni_retcode = ni_frame_buffer_alloc_hwenc(&rot->api_dst_frame.data.frame,
+                                             outlink->w,
+                                             outlink->h,
+                                             0);
+    if (ni_retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "ni rotate filter frame buffer alloc hwenc failed with %d\n", ni_retcode);
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+    // Input.
+
+    scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(in_frames_context->sw_format);
+    input_frame_config.picture_format = scaler_format;
+
+    input_frame_config.rgba_color = frame_surface->ui32nodeAddress;
+    input_frame_config.frame_index = frame_surface->ui16FrameIdx;
+
+    aligned_picture_width = FFALIGN(in->width, BUFFER_WIDTH_PIXEL_ALIGNMENT);
+
+    angle = av_expr_eval(rot->angle_expr, rot->var_values, rot);
+    if (angle == 0.0) {
+        // input_frame_config.orientation = 0; // initialized to zero, unnecessary assignment
+        input_frame_config.picture_width = in->width;
+        input_frame_config.picture_height = in->height;
+
+        input_frame_config.rectangle_width = FFMIN(outlink->w, in->width);
+        input_frame_config.rectangle_height = FFMIN(outlink->h, in->height);
+
+        rotated_picture_width = in->width;
+        rotated_picture_height = in->height;
+    } else if ((angle == -M_PI_2 * 3.0) || (angle == M_PI_2)) {
+        // -270.0Â° || 90.0Â°
+        input_frame_config.orientation = 1;
+        input_frame_config.picture_width = aligned_picture_width;
+        input_frame_config.picture_height = in->height;
+
+        input_frame_config.rectangle_width = FFMIN(outlink->w, in->height);
+        input_frame_config.rectangle_height = FFMIN(outlink->h, in->width);
+
+        rotated_picture_width = in->height;
+        rotated_picture_height = aligned_picture_width;
+    } else if ((angle == -M_PI) || (angle == M_PI)) {
+        // -180.0Â° || 180.0Â°
+        input_frame_config.orientation = 2;
+        input_frame_config.picture_width = aligned_picture_width;
+        input_frame_config.picture_height = in->height;
+
+        input_frame_config.rectangle_width = FFMIN(outlink->w, in->width);
+        input_frame_config.rectangle_height = FFMIN(outlink->h, in->height);
+
+        rotated_picture_width = aligned_picture_width;
+        rotated_picture_height = in->height;
+    } else if ((angle == -M_PI_2) || (angle == M_PI_2 * 3.0)) {
+        // -90.0Â° || 270.0Â°
+        input_frame_config.orientation = 3;
+        input_frame_config.picture_width = aligned_picture_width;
+        input_frame_config.picture_height = in->height;
+
+        input_frame_config.rectangle_width = FFMIN(outlink->w, in->height);
+        input_frame_config.rectangle_height = FFMIN(outlink->h, in->width);
+
+        rotated_picture_width = in->height;
+        rotated_picture_height = aligned_picture_width;
+    } else {
+        av_log(ctx, AV_LOG_ERROR, "ni rotate filter does not support rotation of %.1f radians\n", angle);
+        retcode = AVERROR(EINVAL);
+        goto FAIL;
+    }
+
+    input_frame_config.rectangle_x =
+        (rotated_picture_width > input_frame_config.rectangle_width) ?
+        (rotated_picture_width / 2) - (input_frame_config.rectangle_width / 2) : 0;
+    input_frame_config.rectangle_y =
+        (rotated_picture_height > input_frame_config.rectangle_height) ?
+        (rotated_picture_height / 2) - (input_frame_config.rectangle_height / 2) : 0;
+    if (aligned_picture_width - in->width) {
+        switch (input_frame_config.orientation) {
+        case 1: // 90Â°
+            input_frame_config.rectangle_y =
+                (in->width > input_frame_config.rectangle_height) ?
+                (in->width / 2) - (input_frame_config.rectangle_height / 2) : 0;
+            break;
+        case 2: // 180Â°
+            input_frame_config.rectangle_x =
+                aligned_picture_width - in->width +
+                ((in->width > input_frame_config.rectangle_width) ?
+                 (in->width / 2) - (input_frame_config.rectangle_width / 2) : 0);
+            break;
+        case 3: // 270Â°
+            input_frame_config.rectangle_y =
+                aligned_picture_width - in->width +
+                ((in->width > input_frame_config.rectangle_height) ?
+                 (in->width / 2) - (input_frame_config.rectangle_height / 2) : 0);
+            break;
+        default:
+            break;
+        }
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark(NULL);
+#endif
+
+    // use ni_device_config_frame() instead of ni_device_alloc_frame()
+    // such that input_frame_config's orientation can be configured
+    ni_retcode = ni_device_config_frame(&rot->api_ctx, &input_frame_config);
+    if (ni_retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "ni rotate filter device config input frame failed with %d\n", ni_retcode);
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+    // Output.
+
+    if (rot->fillcolor_enable) {
+        rgba_color = (rot->fillcolor[3] << 24) |
+                     (rot->fillcolor[0] << 16) |
+                     (rot->fillcolor[1] <<  8) |
+                      rot->fillcolor[2];
+    }
+
+    rot->output_frame_config.picture_width = outlink->w;
+    rot->output_frame_config.picture_height = outlink->h;
+    rot->output_frame_config.rectangle_width = input_frame_config.rectangle_width;
+    rot->output_frame_config.rectangle_height = input_frame_config.rectangle_height;
+    rot->output_frame_config.rectangle_x =
+        (rot->output_frame_config.picture_width > rot->output_frame_config.rectangle_width) ?
+        (rot->output_frame_config.picture_width / 2) - (rot->output_frame_config.rectangle_width / 2) : 0;
+    rot->output_frame_config.rectangle_y =
+        (rot->output_frame_config.picture_height > rot->output_frame_config.rectangle_height) ?
+        (rot->output_frame_config.picture_height / 2) - (rot->output_frame_config.rectangle_height / 2) : 0;
+    rot->output_frame_config.rgba_color = rgba_color;
+
+    ni_retcode = ni_device_alloc_frame(&rot->api_ctx,
+                                       rot->output_frame_config.picture_width,
+                                       rot->output_frame_config.picture_height,
+                                       scaler_format,
+                                       NI_SCALER_FLAG_IO,
+                                       rot->output_frame_config.rectangle_width,
+                                       rot->output_frame_config.rectangle_height,
+                                       rot->output_frame_config.rectangle_x,
+                                       rot->output_frame_config.rectangle_y,
+                                       rot->output_frame_config.rgba_color,
+                                       -1,
+                                       NI_DEVICE_TYPE_SCALER);
+
+    if (ni_retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "ni rotate filter device alloc output frame failed with %d\n", ni_retcode);
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+    out = av_frame_alloc();
+    if (!out) {
+        av_log(ctx, AV_LOG_ERROR, "ni rotate filter av_frame_alloc returned NULL\n");
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+    av_frame_copy_props(out, in);
+
+    out->width = rot->outw;
+    out->height = rot->outh;
+    out->format = AV_PIX_FMT_NI_QUAD;
+    out->color_range = AVCOL_RANGE_MPEG;
+
+    out->hw_frames_ctx = av_buffer_ref(out_buffer_ref);
+    out->data[3] = av_malloc(sizeof(niFrameSurface1_t));
+    if (!out->data[3]) {
+        av_log(ctx, AV_LOG_ERROR, "ni rotate filter av_malloc returned NULL\n");
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+    memcpy(out->data[3], frame_surface, sizeof(niFrameSurface1_t));
+
+    ni_retcode = ni_device_session_read_hwdesc(&rot->api_ctx,
+                                               &rot->api_dst_frame,
+                                               NI_DEVICE_TYPE_SCALER);
+    if (ni_retcode != NI_RETCODE_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "ni rotate filter read hwdesc failed with %d\n", ni_retcode);
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark("ni_quadra_rotate");
+#endif
+
+    frame_surface2 = (niFrameSurface1_t *) rot->api_dst_frame.data.frame.p_data[3];
+
+    frame_surface = (niFrameSurface1_t *) out->data[3];
+    frame_surface->ui16FrameIdx = frame_surface2->ui16FrameIdx;
+    frame_surface->ui16session_ID = frame_surface2->ui16session_ID;
+    frame_surface->device_handle = frame_surface2->device_handle;
+    frame_surface->output_idx = frame_surface2->output_idx;
+    frame_surface->src_cpu = frame_surface2->src_cpu;
+    frame_surface->ui32nodeAddress = 0;
+    frame_surface->dma_buf_fd = 0;
+    ff_ni_set_bit_depth_and_encoding_type(&frame_surface->bit_depth,
+                                          &frame_surface->encoding_type,
+                                          in_frames_context->sw_format);
+    frame_surface->ui16width = out->width;
+    frame_surface->ui16height = out->height;
+
+    out->buf[0] = av_buffer_create(out->data[3],
+                                   sizeof(niFrameSurface1_t),
+                                   ff_ni_frame_free,
+                                   NULL,
+                                   0);
+    if (!out->buf[0]) {
+        av_log(ctx, AV_LOG_ERROR, "ni rotate filter av_buffer_create returned NULL\n");
+        retcode = AVERROR(ENOMEM);
+        goto FAIL;
+    }
+
+    av_frame_free(&in);
+    return ff_filter_frame(inlink->dst->outputs[0], out);
+
+FAIL:
+    av_frame_free(&in);
+    if (out)
+        av_frame_free(&out);
+    return retcode;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink  *inlink = ctx->inputs[0];
+    AVFilterLink  *outlink = ctx->outputs[0];
+    AVFrame *frame = NULL;
+    int ret = 0;
+    NetIntRotContext *s = inlink->dst->priv;
+
+    // Forward the status on output link to input link, if the status is set, discard all queued frames
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    if (ff_inlink_check_available_frame(inlink)) {
+        if (s->initialized) {
+            ret = ni_device_session_query_buffer_avail(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        }
+
+        if (ret == NI_RETCODE_ERROR_UNSUPPORTED_FW_VERSION) {
+            av_log(ctx, AV_LOG_WARNING, "No backpressure support in FW\n");
+        } else if (ret < 0) {
+            av_log(ctx, AV_LOG_WARNING, "%s: query ret %d, ready %u inlink framequeue %u available_frame %d outlink framequeue %u frame_wanted %d - return NOT READY\n",
+                __func__, ret, ctx->ready, ff_inlink_queued_frames(inlink), ff_inlink_check_available_frame(inlink), ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+            return FFERROR_NOT_READY;
+        }
+
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+
+        ret = filter_frame(inlink, frame);
+        if (ret >= 0) {
+            ff_filter_set_ready(ctx, 300);
+        }
+        return ret;
+    }
+
+    // We did not get a frame from input link, check its status
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+
+    // We have no frames yet from input link and no EOF, so request some.
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+#define OFFSET(x) offsetof(NetIntRotContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
+
+static const AVOption ni_rotate_options[] = {
+    { "angle",     "set angle (in radians)",       OFFSET(angle_expr_str), AV_OPT_TYPE_STRING, {.str="0"},     0, 0, FLAGS },
+    { "a",         "set angle (in radians)",       OFFSET(angle_expr_str), AV_OPT_TYPE_STRING, {.str="0"},     0, 0, FLAGS },
+    { "out_w",     "set output width expression",  OFFSET(outw_expr_str),  AV_OPT_TYPE_STRING, {.str="iw"},    0, 0, FLAGS },
+    { "ow",        "set output width expression",  OFFSET(outw_expr_str),  AV_OPT_TYPE_STRING, {.str="iw"},    0, 0, FLAGS },
+    { "out_h",     "set output height expression", OFFSET(outh_expr_str),  AV_OPT_TYPE_STRING, {.str="ih"},    0, 0, FLAGS },
+    { "oh",        "set output height expression", OFFSET(outh_expr_str),  AV_OPT_TYPE_STRING, {.str="ih"},    0, 0, FLAGS },
+    { "fillcolor", "set background fill color",    OFFSET(fillcolor_str),  AV_OPT_TYPE_STRING, {.str="black"}, 0, 0, FLAGS },
+    { "c",         "set background fill color",    OFFSET(fillcolor_str),  AV_OPT_TYPE_STRING, {.str="black"}, 0, 0, FLAGS },
+    NI_FILT_OPTION_AUTO_SKIP,
+    NI_FILT_OPTION_KEEPALIVE,
+    NI_FILT_OPTION_BUFFER_LIMIT,
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ni_rotate);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props,
+    },
+};
+
+FFFilter ff_vf_rotate_ni_quadra = {
+    .p.name         = "ni_quadra_rotate",
+    .p.description  = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra rotate the input video v" NI_XCODER_REVISION),
+    .p.priv_class   = &ni_rotate_class,
+    .priv_size      = sizeof(NetIntRotContext),
+    .init           = init,
+    .uninit         = uninit,
+    .activate       = activate,
+    FILTER_QUERY_FUNC(query_formats),
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
diff --git a/libavfilter/vf_scale_ni.c b/libavfilter/vf_scale_ni.c
new file mode 100644
index 0000000000..1388abf590
--- /dev/null
+++ b/libavfilter/vf_scale_ni.c
@@ -0,0 +1,958 @@
+/*
+ * Copyright (c) 2007 Bobby Bingham
+ * Copyright (c) 2020 NetInt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * scale video filter
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "nifilter.h"
+#include "filters.h"
+#include "formats.h"
+#include "libavutil/mem.h"
+#include "fftools/ffmpeg_sched.h"
+#include "scale_eval.h"
+#include "video.h"
+#include "libavutil/avstring.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/avassert.h"
+#include "libavutil/eval.h"
+#include "libswscale/swscale.h"
+
+static const char *const var_names[] = {
+    "in_w",   "iw",
+    "in_h",   "ih",
+    "out_w",  "ow",
+    "out_h",  "oh",
+    "a",
+    "sar",
+    "dar",
+    "hsub",
+    "vsub",
+    "ohsub",
+    "ovsub",
+    "main_w",
+    "main_h",
+    "main_a",
+    "main_sar",
+    "main_dar", "mdar",
+    "main_hsub",
+    "main_vsub",
+    NULL
+};
+
+enum var_name {
+    VAR_IN_W,   VAR_IW,
+    VAR_IN_H,   VAR_IH,
+    VAR_OUT_W,  VAR_OW,
+    VAR_OUT_H,  VAR_OH,
+    VAR_A,
+    VAR_SAR,
+    VAR_DAR,
+    VAR_HSUB,
+    VAR_VSUB,
+    VAR_OHSUB,
+    VAR_OVSUB,
+    VARS_NB
+};
+
+enum OutputFormat {
+    OUTPUT_FORMAT_YUV420P,
+    OUTPUT_FORMAT_YUYV422,
+    OUTPUT_FORMAT_UYVY422,
+    OUTPUT_FORMAT_NV12,
+    OUTPUT_FORMAT_ARGB,
+    OUTPUT_FORMAT_RGBA,
+    OUTPUT_FORMAT_ABGR,
+    OUTPUT_FORMAT_BGRA,
+    OUTPUT_FORMAT_YUV420P10LE,
+    OUTPUT_FORMAT_NV16,
+    OUTPUT_FORMAT_BGR0,
+    OUTPUT_FORMAT_P010LE,
+    OUTPUT_FORMAT_AUTO,
+    OUTPUT_FORMAT_NB
+};
+
+enum AVPixelFormat ff_output_fmt[] = {
+    AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUYV422, AV_PIX_FMT_UYVY422,
+    AV_PIX_FMT_NV12,    AV_PIX_FMT_ARGB,    AV_PIX_FMT_RGBA,
+    AV_PIX_FMT_ABGR,    AV_PIX_FMT_BGRA,    AV_PIX_FMT_YUV420P10LE,
+    AV_PIX_FMT_NV16,    AV_PIX_FMT_BGR0,    AV_PIX_FMT_P010LE};
+
+typedef struct NetIntScaleContext {
+    const AVClass *class;
+    AVDictionary *opts;
+
+    /**
+     * New dimensions. Special values are:
+     *   0 = original width/height
+     *  -1 = keep original aspect
+     *  -N = try to keep aspect but make sure it is divisible by N
+     */
+    int w, h;
+    char *size_str;
+
+    char *w_expr;               ///< width  expression string
+    char *h_expr;               ///< height expression string
+
+    char *flags_str;
+
+    char *in_color_matrix;
+    char *out_color_matrix;
+
+    int force_original_aspect_ratio;
+    int force_divisible_by;
+    int format;
+
+    enum AVPixelFormat out_format;
+    AVBufferRef *out_frames_ref;
+    AVBufferRef *out_frames_ref_1;
+
+    ni_session_context_t api_ctx;
+    ni_session_data_io_t api_dst_frame;
+    ni_scaler_params_t params;
+
+    int initialized;
+    int session_opened;
+    int keep_alive_timeout; /* keep alive timeout setting */
+    int output_compressed;
+
+    int auto_skip;
+    int skip_filter;
+    int autoselect;
+    int buffer_limit;
+    AVExpr *w_pexpr;
+    AVExpr *h_pexpr;
+    double var_values[VARS_NB];
+} NetIntScaleContext;
+
+FFFilter ff_vf_scale_ni;
+static int config_props(AVFilterLink *outlink);
+
+static int check_exprs(AVFilterContext *ctx)
+{
+    NetIntScaleContext *scale = ctx->priv;
+    unsigned vars_w[VARS_NB] = { 0 }, vars_h[VARS_NB] = { 0 };
+
+    if (!scale->w_pexpr && !scale->h_pexpr)
+        return AVERROR(EINVAL);
+
+    if (scale->w_pexpr)
+        av_expr_count_vars(scale->w_pexpr, vars_w, VARS_NB);
+    if (scale->h_expr)
+        av_expr_count_vars(scale->h_pexpr, vars_h, VARS_NB);
+
+    if (vars_w[VAR_OUT_W] || vars_w[VAR_OW]) {
+        av_log(ctx, AV_LOG_ERROR, "Width expression cannot be self-referencing: '%s'.\n", scale->w_expr);
+        return AVERROR(EINVAL);
+    }
+
+    if (vars_h[VAR_OUT_H] || vars_h[VAR_OH]) {
+        av_log(ctx, AV_LOG_ERROR, "Height expression cannot be self-referencing: '%s'.\n", scale->h_expr);
+        return AVERROR(EINVAL);
+    }
+
+    if ((vars_w[VAR_OUT_H] || vars_w[VAR_OH]) &&
+        (vars_h[VAR_OUT_W] || vars_h[VAR_OW])) {
+        av_log(ctx, AV_LOG_WARNING, "Circular references detected for width '%s' and height '%s' - possibly invalid.\n", scale->w_expr, scale->h_expr);
+    }
+
+    return 0;
+}
+
+static int scale_parse_expr(AVFilterContext *ctx, char *str_expr, AVExpr **pexpr_ptr, const char *var, const char *args)
+{
+    NetIntScaleContext *scale = ctx->priv;
+    int ret, is_inited = 0;
+    char *old_str_expr = NULL;
+    AVExpr *old_pexpr = NULL;
+
+    if (str_expr) {
+        old_str_expr = av_strdup(str_expr);
+        if (!old_str_expr)
+            return AVERROR(ENOMEM);
+        av_opt_set(scale, var, args, 0);
+    }
+
+    if (*pexpr_ptr) {
+        old_pexpr = *pexpr_ptr;
+        *pexpr_ptr = NULL;
+        is_inited = 1;
+    }
+
+    ret = av_expr_parse(pexpr_ptr, args, var_names,
+                        NULL, NULL, NULL, NULL, 0, ctx);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Cannot parse expression for %s: '%s'\n", var, args);
+        goto revert;
+    }
+
+    ret = check_exprs(ctx);
+    if (ret < 0)
+        goto revert;
+
+    if (is_inited && (ret = config_props(ctx->outputs[0])) < 0)
+        goto revert;
+
+    av_expr_free(old_pexpr);
+    old_pexpr = NULL;
+    av_freep(&old_str_expr);
+
+    return 0;
+
+revert:
+    av_expr_free(*pexpr_ptr);
+    *pexpr_ptr = NULL;
+    if (old_str_expr) {
+        av_opt_set(scale, var, old_str_expr, 0);
+        av_free(old_str_expr);
+    }
+    if (old_pexpr)
+        *pexpr_ptr = old_pexpr;
+
+    return ret;
+}
+
+static int scale_eval_dimensions(AVFilterContext *ctx)
+{
+    NetIntScaleContext *scale = ctx->priv;
+    const AVFilterLink *inlink = ctx->inputs[0];
+    const AVFilterLink *outlink = ctx->outputs[0];
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+    const AVPixFmtDescriptor *out_desc = av_pix_fmt_desc_get(outlink->format);
+    char *expr;
+    int ret;
+    double res;
+    const AVPixFmtDescriptor *main_desc;
+    const AVFilterLink *main_link;
+
+    scale->var_values[VAR_IN_W]  = scale->var_values[VAR_IW] = inlink->w;
+    scale->var_values[VAR_IN_H]  = scale->var_values[VAR_IH] = inlink->h;
+    scale->var_values[VAR_OUT_W] = scale->var_values[VAR_OW] = NAN;
+    scale->var_values[VAR_OUT_H] = scale->var_values[VAR_OH] = NAN;
+    scale->var_values[VAR_A]     = (double) inlink->w / inlink->h;
+    scale->var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ?
+        (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1;
+    scale->var_values[VAR_DAR]   = scale->var_values[VAR_A] * scale->var_values[VAR_SAR];
+    scale->var_values[VAR_HSUB]  = 1 << desc->log2_chroma_w;
+    scale->var_values[VAR_VSUB]  = 1 << desc->log2_chroma_h;
+    scale->var_values[VAR_OHSUB] = 1 << out_desc->log2_chroma_w;
+    scale->var_values[VAR_OVSUB] = 1 << out_desc->log2_chroma_h;
+
+    res = av_expr_eval(scale->w_pexpr, scale->var_values, NULL);
+    scale->var_values[VAR_OUT_W] = scale->var_values[VAR_OW] = (int) res == 0 ? inlink->w : (int) res;
+
+    res = av_expr_eval(scale->h_pexpr, scale->var_values, NULL);
+    if (isnan(res)) {
+        expr = scale->h_expr;
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+    scale->var_values[VAR_OUT_H] = scale->var_values[VAR_OH] = (int) res == 0 ? inlink->h : (int) res;
+
+    res = av_expr_eval(scale->w_pexpr, scale->var_values, NULL);
+    if (isnan(res)) {
+        expr = scale->w_expr;
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+    scale->var_values[VAR_OUT_W] = scale->var_values[VAR_OW] = (int) res == 0 ? inlink->w : (int) res;
+
+    scale->w = (int)scale->var_values[VAR_OUT_W];
+    scale->h = (int)scale->var_values[VAR_OUT_H];
+
+    return 0;
+
+fail:
+    av_log(ctx, AV_LOG_ERROR,
+           "Error when evaluating the expression '%s'.\n", expr);
+    return ret;
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pix_fmts[] =
+        {AV_PIX_FMT_NI_QUAD, AV_PIX_FMT_NONE};
+    AVFilterFormats *formats;
+
+    formats = ff_make_format_list(pix_fmts);
+
+    if (!formats)
+        return AVERROR(ENOMEM);
+
+    return ff_set_common_formats(ctx, formats);
+}
+
+static av_cold int init(AVFilterContext *ctx)
+{
+    NetIntScaleContext *scale = ctx->priv;
+    int ret;
+
+    if (scale->size_str && (scale->w_expr || scale->h_expr)) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Size and width/height expressions cannot be set at the same time.\n");
+            return AVERROR(EINVAL);
+    }
+
+    if (scale->w_expr && !scale->h_expr)
+        FFSWAP(char *, scale->w_expr, scale->size_str);
+
+    if (scale->size_str) {
+        char buf[32];
+
+        if ((ret = av_parse_video_size(&scale->w, &scale->h, scale->size_str)) < 0) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Invalid size '%s'\n", scale->size_str);
+            return ret;
+        }
+        snprintf(buf, sizeof(buf)-1, "%d", scale->w);
+        av_opt_set(scale, "w", buf, 0);
+        snprintf(buf, sizeof(buf)-1, "%d", scale->h);
+        av_opt_set(scale, "h", buf, 0);
+    }
+    if (!scale->w_expr)
+        av_opt_set(scale, "w", "iw", 0);
+    if (!scale->h_expr)
+        av_opt_set(scale, "h", "ih", 0);
+
+    ret = scale_parse_expr(ctx, NULL, &scale->w_pexpr, "width", scale->w_expr);
+    if (ret < 0)
+        return ret;
+
+    ret = scale_parse_expr(ctx, NULL, &scale->h_pexpr, "height", scale->h_expr);
+    if (ret < 0)
+        return ret;
+
+    av_log(ctx, AV_LOG_VERBOSE, "w:%s h:%s\n", scale->w_expr, scale->h_expr);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    NetIntScaleContext *scale = ctx->priv;
+
+    av_expr_free(scale->w_pexpr);
+    av_expr_free(scale->h_pexpr);
+    scale->w_pexpr = scale->h_pexpr = NULL;
+    av_dict_free(&scale->opts);
+
+    if (scale->api_dst_frame.data.frame.p_buffer)
+        ni_frame_buffer_free(&scale->api_dst_frame.data.frame);
+
+    if (scale->session_opened) {
+        /* Close operation will free the device frames */
+        ni_device_session_close(&scale->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+        ni_device_session_context_clear(&scale->api_ctx);
+    }
+
+    av_buffer_unref(&scale->out_frames_ref);
+    av_buffer_unref(&scale->out_frames_ref_1);
+}
+
+static int init_out_pool(AVFilterContext *ctx)
+{
+    NetIntScaleContext *s = ctx->priv;
+    AVHWFramesContext *out_frames_ctx;
+    int pool_size = DEFAULT_NI_FILTER_POOL_SIZE;
+
+    out_frames_ctx = (AVHWFramesContext*)s->out_frames_ref->data;
+    pool_size += ctx->extra_hw_frames > 0 ? ctx->extra_hw_frames : 0;
+    s->buffer_limit = 1;
+
+    /* Create frame pool on device */
+    return ff_ni_build_frame_pool(&s->api_ctx, out_frames_ctx->width,
+                                  out_frames_ctx->height,
+                                  s->out_format, pool_size,
+                                  s->buffer_limit);
+}
+
+static int config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink0 = outlink->src->inputs[0];
+    AVFilterLink *inlink  = outlink->src->inputs[0];
+    // AVFilterLink *inlink = outlink->src->inputs[0];
+    AVHWFramesContext *in_frames_ctx;
+    AVHWFramesContext *out_frames_ctx;
+    NetIntScaleContext *scale = ctx->priv;
+    int w, h, ret, h_shift, v_shift;
+
+    if ((ret = scale_eval_dimensions(ctx)) < 0)
+        return ret;
+
+    w = scale->w;
+    h = scale->h;
+
+    ff_scale_adjust_dimensions(inlink, &w, &h,
+                               scale->force_original_aspect_ratio,
+                               scale->force_divisible_by, 1.f);
+
+    if (w > NI_MAX_RESOLUTION_WIDTH || h > NI_MAX_RESOLUTION_HEIGHT) {
+        av_log(ctx, AV_LOG_ERROR, "Scaled value (%dx%d) > 8192 not allowed\n", w, h);
+        return AVERROR(EINVAL);
+    }
+
+    if ((w <= 0) || (h <= 0)) {
+        av_log(ctx, AV_LOG_ERROR, "Scaled value (%dx%d) not allowed\n", w, h);
+        return AVERROR(EINVAL);
+    }
+
+    FilterLink *li = ff_filter_link(inlink);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+    if (in_frames_ctx->sw_format == AV_PIX_FMT_NI_QUAD_10_TILE_4X4) {
+        av_log(ctx, AV_LOG_ERROR, "tile4x4 10b not supported for scale!\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* Set the output format */
+    if (scale->format == OUTPUT_FORMAT_AUTO) {
+        scale->out_format = in_frames_ctx->sw_format;
+    } else {
+        scale->out_format = ff_output_fmt[scale->format];
+    }
+    if (scale->out_format == AV_PIX_FMT_NI_QUAD_8_TILE_4X4)
+        scale->output_compressed = 1;
+    else
+        scale->output_compressed = 0;
+
+    av_pix_fmt_get_chroma_sub_sample(scale->out_format, &h_shift, &v_shift);
+
+    outlink->w = FFALIGN(w, (1 << h_shift));
+    outlink->h = FFALIGN(h, (1 << v_shift));
+
+    if (inlink0->sample_aspect_ratio.num) {
+        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink0->w, outlink->w * inlink0->h}, inlink0->sample_aspect_ratio);
+    } else {
+        outlink->sample_aspect_ratio = inlink0->sample_aspect_ratio;
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE,
+           "w:%d h:%d fmt:%s sar:%d/%d -> w:%d h:%d fmt:%s sar:%d/%d\n",
+           inlink->w, inlink->h, av_get_pix_fmt_name(inlink->format),
+           inlink->sample_aspect_ratio.num, inlink->sample_aspect_ratio.den,
+           outlink->w, outlink->h, av_get_pix_fmt_name(outlink->format),
+           outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den);
+
+    //skip the color range check
+    if (scale->auto_skip &&
+        inlink->w == outlink->w &&
+        inlink->h == outlink->h &&
+        in_frames_ctx->sw_format == scale->out_format &&
+        (
+         (!scale->in_color_matrix && (!scale->out_color_matrix || strcmp(scale->out_color_matrix, "bt709") == 0)) ||
+         (!scale->out_color_matrix && (!scale->in_color_matrix || strcmp(scale->in_color_matrix, "bt709") == 0)) ||
+         (scale->in_color_matrix && scale->out_color_matrix && strcmp(scale->in_color_matrix, scale->out_color_matrix) == 0)
+        )
+       ) {
+        //skip hardware scale
+        scale->skip_filter = 1;
+
+        FilterLink *lo = ff_filter_link(outlink);
+        scale->out_frames_ref = av_buffer_ref(li->hw_frames_ctx);
+        if (!scale->out_frames_ref) {
+            return AVERROR(ENOMEM);
+        }
+        av_buffer_unref(&lo->hw_frames_ctx);
+        lo->hw_frames_ctx = av_buffer_ref(scale->out_frames_ref);
+        if (!lo->hw_frames_ctx) {
+            return AVERROR(ENOMEM);
+        }
+        return 0;
+    }
+
+    scale->out_frames_ref = av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+    if (!scale->out_frames_ref)
+        return AVERROR(ENOMEM);
+
+    out_frames_ctx = (AVHWFramesContext *)scale->out_frames_ref->data;
+
+    out_frames_ctx->format    = AV_PIX_FMT_NI_QUAD;
+    out_frames_ctx->width     = outlink->w;
+    out_frames_ctx->height    = outlink->h;
+    out_frames_ctx->sw_format = scale->out_format;
+    out_frames_ctx->initial_pool_size =
+        NI_SCALE_ID; // Repurposed as identity code
+
+    av_hwframe_ctx_init(scale->out_frames_ref);//call this?
+
+    FilterLink *lt = ff_filter_link(ctx->outputs[0]);
+    av_buffer_unref(&lt->hw_frames_ctx);
+    lt->hw_frames_ctx = av_buffer_ref(scale->out_frames_ref);
+    if (!lt->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int config_props_ref(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = outlink->src->inputs[1];
+
+    // AVFilterLink *inlink = outlink->src->inputs[0];
+    NetIntScaleContext *scale = ctx->priv;
+    outlink->w = inlink->w;
+    outlink->h = inlink->h;
+    outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+    outlink->time_base = inlink->time_base;
+    outlink->format = inlink->format;
+
+    FilterLink *li = ff_filter_link(inlink);
+    FilterLink *lo = ff_filter_link(outlink);
+    lo->frame_rate = li->frame_rate;
+    scale->out_frames_ref_1 = av_buffer_ref(li->hw_frames_ctx);
+    if (!scale->out_frames_ref_1) {
+        return AVERROR(ENOMEM);
+    }
+    av_buffer_unref(&lo->hw_frames_ctx);
+    lo->hw_frames_ctx = av_buffer_ref(scale->out_frames_ref_1);
+    if (!lo->hw_frames_ctx) {
+        return AVERROR(ENOMEM);
+    }
+
+    av_log(ctx, AV_LOG_VERBOSE,
+        "w:%d h:%d fmt:%s sar:%d/%d -> w:%d h:%d fmt:%s sar:%d/%d\n",
+        inlink->w, inlink->h, av_get_pix_fmt_name(inlink->format),
+        inlink->sample_aspect_ratio.num, inlink->sample_aspect_ratio.den,
+        outlink->w, outlink->h, av_get_pix_fmt_name(outlink->format),
+        outlink->sample_aspect_ratio.num, outlink->sample_aspect_ratio.den);
+
+    return 0;
+}
+
+static int request_frame(AVFilterLink *outlink)
+{
+    return ff_request_frame(outlink->src->inputs[0]);
+}
+
+static int request_frame_ref(AVFilterLink *outlink)
+{
+    return ff_request_frame(outlink->src->inputs[1]);
+}
+
+/* Process a received frame */
+static int filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    NetIntScaleContext *scale = link->dst->priv;
+    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFrame *out = NULL;
+    niFrameSurface1_t* frame_surface,*new_frame_surface;
+    AVHWFramesContext *pAVHFWCtx;
+    AVNIDeviceContext *pAVNIDevCtx;
+    ni_retcode_t retcode;
+    int scaler_format, cardno;
+    uint16_t tempFID;
+    uint16_t options;
+
+    frame_surface = (niFrameSurface1_t *) in->data[3];
+    if (frame_surface == NULL) {
+        return AVERROR(EINVAL);
+    }
+
+    pAVHFWCtx = (AVHWFramesContext *) in->hw_frames_ctx->data;
+    pAVNIDevCtx       = (AVNIDeviceContext *)pAVHFWCtx->device_ctx->hwctx;
+    cardno            = ni_get_cardno(in);
+
+    if (scale->skip_filter) {
+        //skip hardware scale
+        return ff_filter_frame(link->dst->outputs[0], in);
+    }
+
+    if (!scale->initialized) {
+        retcode = ni_device_session_context_init(&scale->api_ctx);
+        if (retcode < 0) {
+            av_log(link->dst, AV_LOG_ERROR,
+                   "ni scale filter session context init failure\n");
+            goto fail;
+        }
+
+        scale->api_ctx.device_handle = pAVNIDevCtx->cards[cardno];
+        scale->api_ctx.blk_io_handle = pAVNIDevCtx->cards[cardno];
+
+        scale->api_ctx.hw_id             = cardno;
+        scale->api_ctx.device_type       = NI_DEVICE_TYPE_SCALER;
+        scale->api_ctx.scaler_operation  = NI_SCALER_OPCODE_SCALE;
+        scale->api_ctx.keep_alive_timeout = scale->keep_alive_timeout;
+
+        av_log(link->dst, AV_LOG_INFO,
+               "Open scaler session to card %d, hdl %d, blk_hdl %d\n", cardno,
+               scale->api_ctx.device_handle, scale->api_ctx.blk_io_handle);
+
+        retcode =
+            ni_device_session_open(&scale->api_ctx, NI_DEVICE_TYPE_SCALER);
+        if (retcode != NI_RETCODE_SUCCESS) {
+            av_log(link->dst, AV_LOG_ERROR,
+                   "Can't open device session on card %d\n", cardno);
+
+            /* Close operation will free the device frames */
+            ni_device_session_close(&scale->api_ctx, 1, NI_DEVICE_TYPE_SCALER);
+            ni_device_session_context_clear(&scale->api_ctx);
+            goto fail;
+        }
+
+        scale->session_opened = 1;
+
+        if (scale->autoselect) {
+            if (outlink->w <= 540 || outlink->h <= 540)
+                scale->params.filterblit = 1;
+            else
+                scale->params.filterblit = 2;
+        }
+
+#if ((LIBXCODER_API_VERSION_MAJOR > 2) ||                                        \
+      (LIBXCODER_API_VERSION_MAJOR == 2 && LIBXCODER_API_VERSION_MINOR>= 76))
+        if (scale->params.scaler_param_b != 0 || scale->params.scaler_param_c != 0.75) {
+            scale->params.enable_scaler_params = true;
+        } else {
+            scale->params.enable_scaler_params = false;
+        }
+#endif
+        if (scale->params.filterblit) {
+            retcode = ni_scaler_set_params(&scale->api_ctx, &(scale->params));
+            if (retcode < 0)
+                goto fail;
+        }
+
+        if (!((av_strstart(outlink->dst->filter->name, "ni_quadra", NULL)) || (av_strstart(outlink->dst->filter->name, "hwdownload", NULL)))) {
+           link->dst->extra_hw_frames = (DEFAULT_FRAME_THREAD_QUEUE_SIZE > 1) ? DEFAULT_FRAME_THREAD_QUEUE_SIZE : 0;
+        }
+
+        retcode = init_out_pool(link->dst);
+        if (retcode < 0) {
+            av_log(link->dst, AV_LOG_ERROR,
+                   "Internal output allocation failed rc = %d\n", retcode);
+            goto fail;
+        }
+
+        AVHWFramesContext *out_frames_ctx = (AVHWFramesContext *)scale->out_frames_ref->data;
+        AVNIFramesContext *out_ni_ctx = (AVNIFramesContext *)out_frames_ctx->hwctx;
+        ni_cpy_hwframe_ctx(pAVHFWCtx, out_frames_ctx);
+        ni_device_session_copy(&scale->api_ctx, &out_ni_ctx->api_ctx);
+
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pAVHFWCtx->sw_format);
+
+        if ((in->color_range == AVCOL_RANGE_JPEG) && !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+            av_log(link->dst, AV_LOG_WARNING,
+                   "WARNING: Full color range input, limited color range output\n");
+        }
+
+        scale->initialized = 1;
+    }
+
+    scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(pAVHFWCtx->sw_format);
+
+    retcode = ni_frame_buffer_alloc_hwenc(&scale->api_dst_frame.data.frame,
+                                          outlink->w,
+                                          outlink->h,
+                                          0);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark(NULL);
+#endif
+
+    options = 0;
+    if (scale->in_color_matrix && strcmp(scale->in_color_matrix,"bt2020") == 0)
+        options |= NI_SCALER_FLAG_CS;
+    options |= (frame_surface->encoding_type == 2) ? NI_SCALER_FLAG_CMP : 0;
+
+    /*
+     * Allocate device input frame. This call won't actually allocate a frame,
+     * but sends the incoming hardware frame index to the scaler manager
+     */
+    retcode = ni_device_alloc_frame(&scale->api_ctx,
+                                    FFALIGN(in->width, 2),
+                                    FFALIGN(in->height, 2),
+                                    scaler_format,
+                                    options,
+                                    0,
+                                    0,
+                                    0,
+                                    0,
+                                    0,
+                                    frame_surface->ui16FrameIdx,
+                                    NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(link->dst, AV_LOG_DEBUG,
+               "Can't assign input frame %d\n", retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    scaler_format = ff_ni_ffmpeg_to_gc620_pix_fmt(scale->out_format);
+
+    options = NI_SCALER_FLAG_IO;
+    if (scale->out_color_matrix && strcmp(scale->out_color_matrix, "bt2020") == 0)
+        options |= NI_SCALER_FLAG_CS;
+    options |= (scale->output_compressed) ? NI_SCALER_FLAG_CMP : 0;
+
+    /* Allocate hardware device destination frame. This acquires a frame from the pool */
+    retcode = ni_device_alloc_frame(&scale->api_ctx,
+                          FFALIGN(outlink->w,2),
+                          FFALIGN(outlink->h,2),
+                          scaler_format,
+                          options,
+                          0,
+                          0,
+                          0,
+                          0,
+                          0,
+                          -1,
+                          NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(link->dst, AV_LOG_DEBUG,
+               "Can't allocate device output frame %d\n", retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    out = av_frame_alloc();
+    if (!out) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    av_frame_copy_props(out,in);
+
+    out->width  = outlink->w;
+    out->height = outlink->h;
+
+    out->format = AV_PIX_FMT_NI_QUAD;
+
+    /* Quadra 2D engine always outputs limited color range */
+    out->color_range = AVCOL_RANGE_MPEG;
+
+    /* Reference the new hw frames context */
+    out->hw_frames_ctx = av_buffer_ref(scale->out_frames_ref);
+
+    out->data[3] = av_malloc(sizeof(niFrameSurface1_t));
+
+    if (!out->data[3]) {
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    /* Copy the frame surface from the incoming frame */
+    memcpy(out->data[3], in->data[3], sizeof(niFrameSurface1_t));
+
+    /* Set the new frame index */
+    retcode = ni_device_session_read_hwdesc(&scale->api_ctx, &scale->api_dst_frame,
+                                            NI_DEVICE_TYPE_SCALER);
+
+    if (retcode != NI_RETCODE_SUCCESS) {
+        av_log(link->dst, AV_LOG_ERROR,
+               "Can't acquire output frame %d\n",retcode);
+        retcode = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+#ifdef NI_MEASURE_LATENCY
+    ff_ni_update_benchmark("ni_quadra_scale");
+#endif
+
+    tempFID = frame_surface->ui16FrameIdx;
+    frame_surface = (niFrameSurface1_t *)out->data[3];
+    new_frame_surface = (niFrameSurface1_t *)scale->api_dst_frame.data.frame.p_data[3];
+    frame_surface->ui16FrameIdx = new_frame_surface->ui16FrameIdx;
+    frame_surface->ui16session_ID = new_frame_surface->ui16session_ID;
+    frame_surface->device_handle = new_frame_surface->device_handle;
+    frame_surface->output_idx     = new_frame_surface->output_idx;
+    frame_surface->src_cpu = new_frame_surface->src_cpu;
+    frame_surface->dma_buf_fd = 0;
+
+    ff_ni_set_bit_depth_and_encoding_type(&frame_surface->bit_depth,
+                                          &frame_surface->encoding_type,
+                                          scale->out_format);
+
+    /* Remove ni-split specific assets */
+    frame_surface->ui32nodeAddress = 0;
+
+    frame_surface->ui16width  = out->width;
+    frame_surface->ui16height = out->height;
+
+    av_log(link->dst, AV_LOG_DEBUG,
+           "vf_scale_ni.c:IN trace ui16FrameIdx = [%d] --> out [%d]\n",
+           tempFID, frame_surface->ui16FrameIdx);
+
+    out->buf[0] = av_buffer_create(out->data[3], sizeof(niFrameSurface1_t),
+                                   ff_ni_frame_free, NULL, 0);
+
+    av_frame_free(&in);
+
+    return ff_filter_frame(link->dst->outputs[0], out);
+
+fail:
+    av_frame_free(&in);
+    if (out)
+        av_frame_free(&out);
+    return retcode;
+}
+
+static int filter_frame_ref(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterLink *outlink = link->dst->outputs[1];
+    return ff_filter_frame(outlink, in);
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    AVFilterLink  *inlink = ctx->inputs[0];
+    AVFilterLink  *outlink = ctx->outputs[0];
+    AVFrame *frame = NULL;
+    int ret = 0;
+    NetIntScaleContext *s = inlink->dst->priv;
+
+    // Forward the status on output link to input link, if the status is set, discard all queued frames
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    av_log(ctx, AV_LOG_TRACE, "%s: ready %u inlink framequeue %u available_frame %d outlink framequeue %u frame_wanted %d\n",
+        __func__, ctx->ready, ff_inlink_queued_frames(inlink), ff_inlink_check_available_frame(inlink), ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+
+    if (ff_inlink_check_available_frame(inlink)) {
+        if (s->initialized) {
+            ret = ni_device_session_query_buffer_avail(&s->api_ctx, NI_DEVICE_TYPE_SCALER);
+        }
+
+        if (ret == NI_RETCODE_ERROR_UNSUPPORTED_FW_VERSION) {
+            av_log(ctx, AV_LOG_WARNING, "No backpressure support in FW\n");
+        } else if (ret < 0) {
+            av_log(ctx, AV_LOG_WARNING, "%s: query ret %d, ready %u inlink framequeue %u available_frame %d outlink framequeue %u frame_wanted %d - return NOT READY\n",
+                __func__, ret, ctx->ready, ff_inlink_queued_frames(inlink), ff_inlink_check_available_frame(inlink), ff_inlink_queued_frames(outlink), ff_outlink_frame_wanted(outlink));
+            return FFERROR_NOT_READY;
+        }
+
+        ret = ff_inlink_consume_frame(inlink, &frame);
+        if (ret < 0)
+            return ret;
+
+        ret = filter_frame(inlink, frame);
+        if (ret >= 0) {
+            ff_filter_set_ready(ctx, 300);
+        }
+        return ret;
+    }
+
+    // We did not get a frame from input link, check its status
+    FF_FILTER_FORWARD_STATUS(inlink, outlink);
+
+    // We have no frames yet from input link and no EOF, so request some.
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+#define OFFSET(x) offsetof(NetIntScaleContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+
+static const AVOption ni_scale_options[] = {
+    { "w",      "Output video width",  OFFSET(w_expr),   AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "width",  "Output video width",  OFFSET(w_expr),   AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "h",      "Output video height", OFFSET(h_expr),   AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "height", "Output video height", OFFSET(h_expr),   AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { "size",   "set video size",      OFFSET(size_str), AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS },
+    { "s",      "set video size",      OFFSET(size_str), AV_OPT_TYPE_STRING, {.str = NULL}, 0, FLAGS },
+    { "in_color_matrix",  "set input YCbCr type",   OFFSET(in_color_matrix),  AV_OPT_TYPE_STRING, {.str = NULL}, .flags = FLAGS, "color" },
+    { "out_color_matrix", "set output YCbCr type",  OFFSET(out_color_matrix), AV_OPT_TYPE_STRING, {.str = NULL}, .flags = FLAGS, "color"},
+        { "bt709",       NULL, 0, AV_OPT_TYPE_CONST, {.str = "bt709"},     0, 0, FLAGS, "color" },
+        { "bt2020",      NULL, 0, AV_OPT_TYPE_CONST, {.str = "bt2020"},    0, 0, FLAGS, "color" },
+    { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 2, FLAGS, "force_oar" },
+    { "format", "set_output_format", OFFSET(format), AV_OPT_TYPE_INT, {.i64=OUTPUT_FORMAT_AUTO}, 0, OUTPUT_FORMAT_NB-1, FLAGS, "format" },
+        { "yuv420p",     "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_YUV420P},     .flags = FLAGS, .unit = "format" },
+        { "yuyv422",     "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_YUYV422},     .flags = FLAGS, .unit = "format" },
+        { "uyvy422",     "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_UYVY422},     .flags = FLAGS, .unit = "format" },
+        { "nv12",        "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_NV12},        .flags = FLAGS, .unit = "format" },
+        { "argb",        "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_ARGB},        .flags = FLAGS, .unit = "format" },
+        { "rgba",        "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_RGBA},        .flags = FLAGS, .unit = "format" },
+        { "abgr",        "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_ABGR},        .flags = FLAGS, .unit = "format" },
+        { "bgra",        "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_BGRA},        .flags = FLAGS, .unit = "format" },
+        { "yuv420p10le", "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_YUV420P10LE}, .flags = FLAGS, .unit = "format" },
+        { "nv16",        "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_NV16},        .flags = FLAGS, .unit = "format" },
+        { "bgr0",        "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_BGR0},        .flags = FLAGS, .unit = "format" },
+        { "p010le",      "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_P010LE},      .flags = FLAGS, .unit = "format" },
+        { "auto",        "", 0, AV_OPT_TYPE_CONST, {.i64=OUTPUT_FORMAT_AUTO},        .flags = FLAGS, .unit = "format"},
+    { "disable",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0}, 0, 0, FLAGS, "force_oar" },
+    { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, FLAGS, "force_oar" },
+    { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2}, 0, 0, FLAGS, "force_oar" },
+    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, {.i64 = 1}, 1, 256, FLAGS },
+    { "filterblit", "filterblit enable", OFFSET(params.filterblit), AV_OPT_TYPE_INT, {.i64=0}, 0, 4, FLAGS },
+#if ((LIBXCODER_API_VERSION_MAJOR > 2) || (LIBXCODER_API_VERSION_MAJOR == 2 && LIBXCODER_API_VERSION_MINOR>= 76))
+    { "param_b", "Parameter B for bicubic", OFFSET(params.scaler_param_b), AV_OPT_TYPE_DOUBLE, {.dbl=0.0},  0, 1, FLAGS },
+    { "param_c", "Parameter C for bicubic", OFFSET(params.scaler_param_c), AV_OPT_TYPE_DOUBLE, {.dbl=0.75}, 0, 1, FLAGS },
+#endif
+    { "autoselect", "auto select filterblit mode according to resolution", OFFSET(autoselect), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+    NI_FILT_OPTION_AUTO_SKIP,
+    NI_FILT_OPTION_KEEPALIVE,
+    NI_FILT_OPTION_BUFFER_LIMIT,
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ni_scale);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = filter_frame,
+    },
+};
+
+static const AVFilterPad outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_props,
+    },
+};
+
+FFFilter ff_vf_scale_ni_quadra = {
+    .p.name         = "ni_quadra_scale",
+    .p.description  = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra video scaler v" NI_XCODER_REVISION),
+    .p.priv_class   = &ni_scale_class,
+    .priv_size      = sizeof(NetIntScaleContext),
+    .init           = init,
+    .uninit         = uninit,
+    .activate       = activate,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+    FILTER_INPUTS(inputs),
+    FILTER_OUTPUTS(outputs),
+    FILTER_QUERY_FUNC(query_formats),
+};
diff --git a/libavfilter/vf_split_ni.c b/libavfilter/vf_split_ni.c
new file mode 100644
index 0000000000..7537127132
--- /dev/null
+++ b/libavfilter/vf_split_ni.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright (c) 2007 Bobby Bingham
+ * Copyright (c) 2021 NetInt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio and video splitter
+ */
+
+#include <stdio.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "nifilter.h"
+#include "version.h"
+#include <ni_device_api.h>
+#include "avfilter_internal.h"
+#include "framequeue.h"
+#include "avfilter.h"
+#include "audio.h"
+#include "formats.h"
+#include "filters.h"
+#include "video.h"
+
+typedef struct NetIntSplitContext {
+    const AVClass *class;
+    bool initialized;
+    int nb_output0;
+    int nb_output1;
+    int nb_output2;
+    int total_outputs;
+    int frame_contexts_applied;
+    ni_split_context_t src_ctx;
+    AVBufferRef *out_frames_ref[3];
+} NetIntSplitContext;
+
+static int config_output(AVFilterLink *link);
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat input_pix_fmts[] = {
+        AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUV420P10LE,
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_P010LE,
+        AV_PIX_FMT_NI_QUAD,
+        AV_PIX_FMT_NONE,
+    };
+    static const enum AVPixelFormat output_pix_fmts[] = {
+        AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUVJ420P,
+        AV_PIX_FMT_YUV420P10LE,
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_P010LE,
+        AV_PIX_FMT_NI_QUAD,
+        AV_PIX_FMT_NONE,
+    };
+    AVFilterFormats *in_fmts = ff_make_format_list(input_pix_fmts);
+    AVFilterFormats *out_fmts = ff_make_format_list(output_pix_fmts);
+
+    // NOLINTNEXTLINE(clang-diagnostic-unused-result)
+    ff_formats_ref(in_fmts, &ctx->inputs[0]->outcfg.formats);
+    // NOLINTNEXTLINE(clang-diagnostic-unused-result)
+    ff_formats_ref(out_fmts, &ctx->outputs[0]->incfg.formats);
+
+    return 0;
+}
+
+static av_cold int split_init(AVFilterContext *ctx)
+{
+    NetIntSplitContext *s = ctx->priv;
+    int i, ret;
+
+    av_log(ctx, AV_LOG_DEBUG, "ni_quadra_split INIT out0,1,2 = %d %d %d ctx->nb_outputs = %d\n",
+        s->nb_output0, s->nb_output1, s->nb_output2,
+        ctx->nb_outputs);
+    if (s->nb_output2 && s->nb_output1 == 0) {
+        //swap them for reorder to use out1 first
+        s->nb_output1 = s->nb_output2;
+        s->nb_output2 = 0;
+        av_log(ctx, AV_LOG_DEBUG, "ni_quadra_split INIT out2 moved to out1\n");
+    }
+
+    s->total_outputs = s->nb_output0 + s->nb_output1 + s->nb_output2;
+
+    for (i = 0; i < s->total_outputs; i++) {
+        char name[32];
+        AVFilterPad pad = { 0 };
+
+        snprintf(name, sizeof(name), "output%d", i);
+        pad.type = ctx->filter->inputs[0].type;
+        pad.name = av_strdup(name);
+        if (!pad.name) {
+            return AVERROR(ENOMEM);
+        }
+        pad.config_props = config_output;
+
+        if (!pad.name)
+            return AVERROR(ENOMEM);
+
+        if ((ret = ff_append_outpad_free_name(ctx, &pad)) < 0) {
+            av_freep(&pad.name);
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static av_cold void split_uninit(AVFilterContext *ctx)
+{
+    int i;
+    NetIntSplitContext *s = ctx->priv;
+    for (i = 0; i < ctx->nb_outputs; i++)
+        av_freep(&ctx->output_pads[i].name);
+
+    for (i = 0; i < 3; i++) {
+        if (s->out_frames_ref[i])
+            av_buffer_unref(&s->out_frames_ref[i]);
+    }
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *avctx = inlink->dst;
+    NetIntSplitContext *s = avctx->priv;
+    AVHWFramesContext *ctx;
+    ni_split_context_t *p_split_ctx_dst = &s->src_ctx;
+    AVNIFramesContext *src_ctx;
+    ni_split_context_t *p_split_ctx_src;
+    int i;
+    s->frame_contexts_applied = -1;
+    FilterLink *li = ff_filter_link(inlink);
+
+    if (li->hw_frames_ctx == NULL) {
+        for (i = 0; i < 3; i++) {
+            s->src_ctx.w[i]   = inlink->w;
+            s->src_ctx.h[i]   = inlink->h;
+            s->src_ctx.f[i]   = -1;
+            s->src_ctx.f8b[i] = -1;
+        }
+    } else {
+        ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+        src_ctx = (AVNIFramesContext*) ctx->hwctx;
+        p_split_ctx_src = &src_ctx->split_ctx;
+        memcpy(p_split_ctx_dst, p_split_ctx_src, sizeof(ni_split_context_t));
+        for (i = 0; i < 3; i++) {
+            s->frame_contexts_applied = 0;
+            av_log(avctx, AV_LOG_DEBUG, "[%d] %d x %d  f8b %d\n", i,
+                   p_split_ctx_dst->w[i], p_split_ctx_dst->h[i],
+                   p_split_ctx_dst->f8b[i]);
+        }
+        if (p_split_ctx_dst->enabled == 0) {
+            for (i = 0; i < 3; i++) {
+                s->src_ctx.w[i]   = inlink->w;
+                s->src_ctx.h[i]   = inlink->h;
+                s->src_ctx.f[i]   = -1;
+                s->src_ctx.f8b[i] = -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int init_out_hwctxs(AVFilterContext *ctx)
+{
+    NetIntSplitContext *s = ctx->priv;
+    AVHWFramesContext *in_frames_ctx;
+    AVHWFramesContext *out_frames_ctx[3];
+    enum AVPixelFormat out_format;
+    int i, j;
+
+    FilterLink *li = ff_filter_link(ctx->inputs[0]);
+    if (li->hw_frames_ctx == NULL) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+
+    in_frames_ctx = (AVHWFramesContext *)li->hw_frames_ctx->data;
+
+    if (s->src_ctx.enabled == 1) {
+        for (i = 0; i < 3; i++) {
+            s->out_frames_ref[i] =
+                av_hwframe_ctx_alloc(in_frames_ctx->device_ref);
+            if (!s->out_frames_ref[i])
+                return AVERROR(ENOMEM);
+            out_frames_ctx[i] = (AVHWFramesContext *)s->out_frames_ref[i]->data;
+
+            out_frames_ctx[i]->format = AV_PIX_FMT_NI_QUAD;
+            out_frames_ctx[i]->width  = s->src_ctx.w[i];
+            out_frames_ctx[i]->height = s->src_ctx.h[i];
+
+            if (s->src_ctx.f[i] == -1) {
+                return AVERROR(EINVAL);
+            }
+
+            switch (s->src_ctx.f[i]) {
+                case NI_PIXEL_PLANAR_FORMAT_PLANAR: // yuv420p or p10
+                    out_format = (s->src_ctx.f8b[i] == 1) ? AV_PIX_FMT_YUV420P
+                                                          : AV_PIX_FMT_YUV420P10LE;
+                    break;
+                case NI_PIXEL_PLANAR_FORMAT_TILED4X4: // tiled
+                    out_format = (s->src_ctx.f8b[i] == 1)
+                                     ? AV_PIX_FMT_NI_QUAD_8_TILE_4X4
+                                     : AV_PIX_FMT_NI_QUAD_10_TILE_4X4;
+                    break;
+                case NI_PIXEL_PLANAR_FORMAT_SEMIPLANAR: // NV12
+                    out_format = (s->src_ctx.f8b[i] == 1) ? AV_PIX_FMT_NV12
+                                                          : AV_PIX_FMT_P010LE;
+                    break;
+                default:
+                    av_log(ctx, AV_LOG_ERROR, "PPU%d invalid pixel format %d in hwframe ctx\n", i, s->src_ctx.f[i]);
+                    return AVERROR(EINVAL);
+            }
+            out_frames_ctx[i]->sw_format = out_format;
+            out_frames_ctx[i]->initial_pool_size = -1; // already has its own pool
+
+            /* Don't check return code, this will intentionally fail */
+            av_hwframe_ctx_init(s->out_frames_ref[i]);
+
+            ni_cpy_hwframe_ctx(in_frames_ctx, out_frames_ctx[i]);
+            ((AVNIFramesContext *) out_frames_ctx[i]->hwctx)->split_ctx.enabled = 0;
+        }
+
+        for (i = 0; i < ctx->nb_outputs; i++) {
+            FilterLink *lo = ff_filter_link(ctx->outputs[i]);
+            av_buffer_unref(&lo->hw_frames_ctx);
+            if (i < s->nb_output0) {
+                j = 0;
+            } else if (i < s->nb_output0 + s->nb_output1) {
+                j = 1;
+            } else {
+                j = 2;
+            }
+            lo->hw_frames_ctx = av_buffer_ref(s->out_frames_ref[j]);
+
+            av_log(ctx, AV_LOG_DEBUG, "NI:%s:out\n",
+                   (s->src_ctx.f[j] == 0)
+                       ? "semiplanar"
+                       : (s->src_ctx.f[j] == 2) ? "tiled" : "planar");
+            if (!lo->hw_frames_ctx)
+                return AVERROR(ENOMEM);
+
+            av_log(ctx, AV_LOG_DEBUG,
+                   "ni_split superframe config_output_hwctx[%d] %p\n", i,
+                   lo->hw_frames_ctx);
+        }
+    } else { // no possibility of using extra outputs
+        for (i = 0; i < ctx->nb_outputs; i++) {
+            FilterLink *lo = ff_filter_link(ctx->outputs[i]);
+            av_buffer_unref(&lo->hw_frames_ctx);
+            if (i < s->nb_output0) {
+                lo->hw_frames_ctx = av_buffer_ref(li->hw_frames_ctx);
+            }
+            if (!lo->hw_frames_ctx)
+                return AVERROR(ENOMEM);
+
+            av_log(ctx, AV_LOG_DEBUG, "ni_split config_output_hwctx[%d] %p\n",
+                   i, lo->hw_frames_ctx);
+        }
+        av_log(ctx, AV_LOG_DEBUG,
+               "ni_split config_output_hwctx set direct to output\n");
+    }
+    return 0;
+}
+
+static int config_output(AVFilterLink *link)
+{
+    // config output sets all outputs at a time since there's no
+    // easy way to track the target output based on inlink.
+    // fairly trivial assignments here so no performance worries
+    AVFilterContext *ctx = link->src;
+    NetIntSplitContext *s = ctx->priv;
+    int i, ret;
+
+    for (i = 0; i < ctx->nb_outputs; i++) {
+        if (i < s->nb_output0) {
+            ctx->outputs[i]->w = s->src_ctx.w[0];
+            ctx->outputs[i]->h = s->src_ctx.h[0];
+        } else if (i < s->nb_output0 + s->nb_output1) {
+            ctx->outputs[i]->w = s->src_ctx.w[1];
+            ctx->outputs[i]->h = s->src_ctx.h[1];
+        } else {
+            ctx->outputs[i]->w = s->src_ctx.w[2];
+            ctx->outputs[i]->h = s->src_ctx.h[2];
+        }
+        av_log(ctx, AV_LOG_DEBUG,
+               "ni_split config_output[%d] w x h = %d x %d\n", i,
+               ctx->outputs[i]->w, ctx->outputs[i]->h);
+    }
+    if (s->frame_contexts_applied == 0) {
+        s->frame_contexts_applied = 1; // run once per set ni_split, not per output
+        ret = init_out_hwctxs(ctx);
+        if (ret < 0)
+            return ret;
+    }
+    return 0;
+}
+
+static int filter_ni_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    NetIntSplitContext *s = ctx->priv;
+    int i, ret = AVERROR_EOF;
+    int output_index;
+    niFrameSurface1_t *p_data3;
+
+    if (!s->initialized) {
+        for (i = 0; i < 3; i++) {
+            AVHWFramesContext *in_frames_ctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
+            AVHWFramesContext *out_frames_ctx = (AVHWFramesContext *)s->out_frames_ref[i]->data;
+            ni_cpy_hwframe_ctx(in_frames_ctx, out_frames_ctx);
+            AVNIFramesContext *ni_frames_ctx = (AVNIFramesContext *)out_frames_ctx->hwctx;
+            ni_frames_ctx->split_ctx.enabled = 0;
+        }
+        s->initialized = 1;
+    }
+
+    for (i = 0; i < ctx->nb_outputs; i++) {
+        AVFrame *buf_out;
+        FilterLinkInternal* const li = ff_link_internal(ctx->outputs[i]);
+        if (li->status_in)
+            continue;
+
+        buf_out = av_frame_alloc();
+        if (!buf_out) {
+            ret = AVERROR(ENOMEM);
+            break;
+        }
+        av_frame_copy_props(buf_out, frame);
+
+        buf_out->format = frame->format;
+
+        if (i < s->nb_output0) {
+            output_index = 0;
+        } else if (i < s->nb_output0 + s->nb_output1) {
+            if (!frame->buf[1]) {
+                ret = AVERROR(ENOMEM);
+                av_frame_free(&buf_out);
+                break;
+            }
+            output_index = 1;
+        } else {
+            if (!frame->buf[2]) {
+                ret = AVERROR(ENOMEM);
+                av_frame_free(&buf_out);
+                break;
+            }
+            output_index = 2;
+        }
+        buf_out->buf[0]        = av_buffer_ref(frame->buf[output_index]);
+        buf_out->hw_frames_ctx = av_buffer_ref(s->out_frames_ref[output_index]);
+        buf_out->data[3] = buf_out->buf[0]->data;
+        p_data3 = (niFrameSurface1_t*)((uint8_t*)buf_out->data[3]);
+
+        buf_out->width = ctx->outputs[i]->w = p_data3->ui16width;
+        buf_out->height = ctx->outputs[i]->h = p_data3->ui16height;
+
+        av_log(ctx, AV_LOG_DEBUG, "output %d supplied WxH = %d x %d FID %d offset %d\n",
+               i, buf_out->width, buf_out->height,
+               p_data3->ui16FrameIdx, p_data3->ui32nodeAddress);
+
+        ret = ff_filter_frame(ctx->outputs[i], buf_out);
+        if (ret < 0)
+            break;
+    }
+    return ret;
+}
+
+static int filter_std_frame(AVFilterLink *inlink, AVFrame *frame)
+{//basically clone of native split
+    AVFilterContext *ctx = inlink->dst;
+    NetIntSplitContext *s = ctx->priv;
+    int i, ret = AVERROR_EOF;
+    if (s->nb_output0 < 2) {
+        av_log(ctx, AV_LOG_ERROR, "ni_split must have at least 2 outputs for Standard split!\n");
+        ret = AVERROR(EINVAL);
+        return ret;
+    }
+    if (s->nb_output1) {
+        av_log(ctx, AV_LOG_ERROR, "ni_split output1 or output2 param must not be used for Standard splitting!\n");
+        ret = AVERROR(E2BIG);
+        return ret;
+    }
+
+    for (i = 0; i < ctx->nb_outputs; i++) {
+        AVFrame *buf_out;
+        FilterLinkInternal* const li = ff_link_internal(ctx->outputs[i]);
+        if (li->status_in)
+            continue;
+        buf_out = av_frame_clone(frame);
+        if (!buf_out) {
+            ret = AVERROR(ENOMEM);
+            break;
+        }
+
+        ret = ff_filter_frame(ctx->outputs[i], buf_out);
+        if (ret < 0)
+            break;
+    }
+    return ret;
+}
+
+static int activate(AVFilterContext *ctx)
+{
+    NetIntSplitContext *s = ctx->priv;
+    AVFilterLink *inlink = ctx->inputs[0];
+    AVFrame *frame;
+    int status, ret, nb_eofs = 0;
+    int64_t pts;
+
+    for (int i = 0; i < ctx->nb_outputs; i++) {
+        nb_eofs += ff_outlink_get_status(ctx->outputs[i]) == AVERROR_EOF;
+    }
+
+    if (nb_eofs == ctx->nb_outputs) {
+        ff_inlink_set_status(inlink, AVERROR_EOF);
+        return 0;
+    }
+
+    ret = ff_inlink_consume_frame(inlink, &frame);
+    if (ret < 0) {
+        return ret;
+    }
+    if (ret > 0) {
+        av_log(ctx, AV_LOG_TRACE, "out0,1,2 = %d %d %d total = %d\n",
+               s->nb_output0, s->nb_output1, s->nb_output2,
+               ctx->nb_outputs);
+
+        av_log(ctx, AV_LOG_DEBUG, "ni_split: filter_frame, in format=%d, Sctx %d\n",
+               frame->format,
+               s->src_ctx.enabled);
+
+        if (frame->format == AV_PIX_FMT_NI_QUAD && s->src_ctx.enabled == 1)
+        {
+            ret = filter_ni_frame(inlink, frame);
+        }
+        else
+        {
+            ret = filter_std_frame(inlink, frame);
+        }
+
+        av_frame_free(&frame);
+        if (ret < 0) {
+            return ret;
+        } else {
+            ff_filter_set_ready(ctx, 300);
+        }
+    }
+
+    if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+        for (int i = 0; i < ctx->nb_outputs; i++) {
+            if (ff_outlink_get_status(ctx->outputs[i])) {
+                continue;
+            }
+            ff_outlink_set_status(ctx->outputs[i], status, pts);
+        }
+        return 0;
+    }
+
+    for (int i = 0; i < ctx->nb_outputs; i++) {
+        if (ff_outlink_get_status(ctx->outputs[i])) {
+            continue;
+        }
+
+        if (ff_outlink_frame_wanted(ctx->outputs[i])) {
+            ff_inlink_request_frame(inlink);
+            return 0;
+        }
+    }
+
+    return FFERROR_NOT_READY;
+}
+
+#define OFFSET(x) offsetof(NetIntSplitContext, x)
+#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_FILTERING_PARAM)
+
+static const AVOption ni_split_options[] = {
+    { "output0", "Copies of output0", OFFSET(nb_output0), AV_OPT_TYPE_INT, {.i64 = 2}, 0, INT_MAX, FLAGS },
+    { "output1", "Copies of output1", OFFSET(nb_output1), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
+    { "output2", "Copies of output2", OFFSET(nb_output2), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(ni_split);
+
+static const AVFilterPad inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+    },
+};
+
+FFFilter ff_vf_split_ni_quadra = {
+    .p.name         = "ni_quadra_split",
+    .p.description  = NULL_IF_CONFIG_SMALL(
+        "NETINT Quadra demux input from decoder post-processor unit (PPU) to N video outputs v" NI_XCODER_REVISION),
+    .p.priv_class   = &ni_split_class,
+    .priv_size      = sizeof(NetIntSplitContext),
+    .init           = split_init,
+    .uninit         = split_uninit,
+    .p.flags        = AVFILTER_FLAG_DYNAMIC_OUTPUTS,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+    .activate       = activate,
+    FILTER_INPUTS(inputs),
+    FILTER_QUERY_FUNC(query_formats),
+};
-- 
2.25.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".