From: Niklas Haas via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: Niklas Haas <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PR] swscale: some API usage improvements; and a contentious union (PR #22297)
Date: Thu, 26 Feb 2026 18:07:23 -0000
Message-ID: <177212924425.25.7171214361113436911@29965ddac10e> (raw)
PR #22297 opened by Niklas Haas (haasn)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22297
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22297.patch
I have no particularly strong feelings about the second and third commits in this series, so feel free to reject. The first commit though is an API safety improvement for sure.
>From 61476f77b07a0474d22892cd4b894aefc0bcec6d Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Thu, 26 Feb 2026 18:46:04 +0100
Subject: [PATCH 1/3] swscale: explicitly track if a context is "legacy" or not
The legacy API is defined by sws_init_context(), sws_scale() etc., whereas
the "modern" API is defined by just using sws_scale_frame() without prior
init call.
This int allows us to cleanly distinguish the type of context, paving the
way for some minor refactoring.
As an immediate benefit, we now gain a bunch of explict error checks to
ensure the API is used correctly (i.e. sws_scale() not called before
sws_init_context()).
Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git@haasn.dev>
---
libswscale/swscale.c | 15 +++++++++++++--
libswscale/swscale_internal.h | 2 ++
libswscale/utils.c | 12 ++++++++++--
3 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 5dbd93e0a2..6cf91116cd 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1208,6 +1208,8 @@ static int scale_internal(SwsContext *sws,
void sws_frame_end(SwsContext *sws)
{
SwsInternal *c = sws_internal(sws);
+ if (!c->is_legacy_init)
+ return;
av_frame_unref(c->frame_src);
av_frame_unref(c->frame_dst);
c->src_ranges.nb_ranges = 0;
@@ -1217,6 +1219,8 @@ int sws_frame_start(SwsContext *sws, AVFrame *dst, const AVFrame *src)
{
SwsInternal *c = sws_internal(sws);
int ret, allocated = 0;
+ if (!c->is_legacy_init)
+ return AVERROR(EINVAL);
ret = av_frame_ref(c->frame_src, src);
if (ret < 0)
@@ -1249,6 +1253,8 @@ int sws_send_slice(SwsContext *sws, unsigned int slice_start,
{
SwsInternal *c = sws_internal(sws);
int ret;
+ if (!c->is_legacy_init)
+ return AVERROR(EINVAL);
ret = ff_range_add(&c->src_ranges, slice_start, slice_height);
if (ret < 0)
@@ -1272,6 +1278,8 @@ int sws_receive_slice(SwsContext *sws, unsigned int slice_start,
SwsInternal *c = sws_internal(sws);
unsigned int align = sws_receive_slice_alignment(sws);
uint8_t *dst[4];
+ if (!c->is_legacy_init)
+ return AVERROR(EINVAL);
/* wait until complete input has been received */
if (!(c->src_ranges.nb_ranges == 1 &&
@@ -1379,9 +1387,9 @@ int sws_scale_frame(SwsContext *sws, AVFrame *dst, const AVFrame *src)
if (!src || !dst)
return AVERROR(EINVAL);
- if (c->frame_src) {
+ if (c->is_legacy_init) {
/* Context has been initialized with explicit values, fall back to
- * legacy API */
+ * legacy API behavior. */
ret = sws_frame_start(sws, dst, src);
if (ret < 0)
return ret;
@@ -1552,6 +1560,9 @@ int attribute_align_arg sws_scale(SwsContext *sws,
const int dstStride[])
{
SwsInternal *c = sws_internal(sws);
+ if (!c->is_legacy_init)
+ return AVERROR(EINVAL);
+
if (c->nb_slice_ctx) {
sws = c->slice_ctx[0];
c = sws_internal(sws);
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 81ec5ef0cc..e70b3ccbff 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -704,6 +704,8 @@ struct SwsInternal {
// Hardware specific private data
void *hw_priv;
+
+ int is_legacy_init;
};
//FIXME check init (where 0)
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 42b49a5cd5..8ce277b06e 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -854,6 +854,8 @@ int sws_setColorspaceDetails(SwsContext *sws, const int inv_table[4],
const AVPixFmtDescriptor *desc_dst;
const AVPixFmtDescriptor *desc_src;
int ret, need_reinit = 0;
+ if (!c || !c->is_legacy_init)
+ return AVERROR(EINVAL);
if (c->nb_slice_ctx) {
int parent_ret = 0;
@@ -1009,8 +1011,8 @@ int sws_getColorspaceDetails(SwsContext *sws, int **inv_table,
int *brightness, int *contrast, int *saturation)
{
SwsInternal *c = sws_internal(sws);
- if (!c)
- return -1;
+ if (!c || !c->is_legacy_init)
+ return AVERROR(EINVAL);
if (c->nb_slice_ctx) {
return sws_getColorspaceDetails(c->slice_ctx[0], inv_table, srcRange,
@@ -1899,6 +1901,7 @@ av_cold int sws_init_context(SwsContext *sws, SwsFilter *srcFilter,
enum AVPixelFormat src_format, dst_format;
int ret;
+ c->is_legacy_init = 1;
c->frame_src = av_frame_alloc();
c->frame_dst = av_frame_alloc();
if (!c->frame_src || !c->frame_dst)
@@ -2270,6 +2273,11 @@ void sws_freeContext(SwsContext *sws)
for (i = 0; i < FF_ARRAY_ELEMS(c->graph); i++)
ff_sws_graph_free(&c->graph[i]);
+ if (!c->is_legacy_init) {
+ av_free(c);
+ return;
+ }
+
for (i = 0; i < c->nb_slice_ctx; i++)
sws_freeContext(c->slice_ctx[i]);
av_freep(&c->slice_ctx);
--
2.52.0
>From 9242e034b45d142b6e75e1183f3d3778a00113f3 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Thu, 26 Feb 2026 18:55:13 +0100
Subject: [PATCH 2/3] swscale/utils: reset non-legacy/dynamic fields on legacy
init
Clear out any "dynamic API" state left over after a sws_init_context() call,
in the unlikely case that an API user switches to the legacy API after using
the modern one.
Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git@haasn.dev>
---
libswscale/utils.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 8ce277b06e..125b580aef 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1893,6 +1893,24 @@ static int context_init_threaded(SwsContext *sws,
return 0;
}
+/**
+ * Clear out any state from the modern non-legacy API, and reset the context
+ * back to a state as if sws_scale_frame() had never been used with it.
+ */
+static void reset_dynamic_state(SwsContext *sws)
+{
+ SwsInternal *c = sws_internal(sws);
+ if (c->is_legacy_init)
+ return;
+
+ for (int i = 0; i < FF_ARRAY_ELEMS(c->graph); i++)
+ ff_sws_graph_free(&c->graph[i]);
+
+#if CONFIG_VULKAN
+ ff_sws_vk_uninit(sws);
+#endif
+}
+
av_cold int sws_init_context(SwsContext *sws, SwsFilter *srcFilter,
SwsFilter *dstFilter)
{
@@ -1901,6 +1919,8 @@ av_cold int sws_init_context(SwsContext *sws, SwsFilter *srcFilter,
enum AVPixelFormat src_format, dst_format;
int ret;
+ /* Clear any state related to the modern non-legacy API */
+ reset_dynamic_state(sws);
c->is_legacy_init = 1;
c->frame_src = av_frame_alloc();
c->frame_dst = av_frame_alloc();
@@ -2266,18 +2286,13 @@ void sws_freeContext(SwsContext *sws)
if (!c)
return;
-#if CONFIG_VULKAN
- ff_sws_vk_uninit(sws);
-#endif
-
- for (i = 0; i < FF_ARRAY_ELEMS(c->graph); i++)
- ff_sws_graph_free(&c->graph[i]);
-
if (!c->is_legacy_init) {
+ reset_dynamic_state(sws);
av_free(c);
return;
}
+ /* Uninit legacy API fields */
for (i = 0; i < c->nb_slice_ctx; i++)
sws_freeContext(c->slice_ctx[i]);
av_freep(&c->slice_ctx);
--
2.52.0
>From 319054d748dab70e7732cd338da167c85c681b12 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Thu, 26 Feb 2026 18:33:35 +0100
Subject: [PATCH 3/3] swscale: define legacy and modern context internals as
union
The previous commits ensure that we don't mix up these APIs.
The main benefits include:
1. Clearer code organization and communication of purpose
2. Better cache locality for the new code; which now doesn't need to
read from the end of a ~40 kB struct to access newly added fields.
3. Ability to extend the non-legacy context internals without constantly
breaking the hard-coded ASM offsets or needlessly growing this struct.
I've allowed myself the luxury of not double-indenting the nested union/state
blocks.
Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git@haasn.dev>
---
libswscale/swscale_internal.h | 601 +++++++++++++++++-----------------
libswscale/x86/output.asm | 2 +-
2 files changed, 306 insertions(+), 297 deletions(-)
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index e70b3ccbff..c9f00dc789 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -334,131 +334,142 @@ struct SwsInternal {
/* Parent context (for slice contexts) */
SwsContext *parent;
- AVSliceThread *slicethread;
- SwsContext **slice_ctx;
- int *slice_err;
- int nb_slice_ctx;
+ int is_legacy_init;
+ union {
- /* Scaling graph, reinitialized dynamically as needed. */
- SwsGraph *graph[2]; /* top, bottom fields */
+ /* Dynamic graph-based API (sws_scale_frame() without init) */
+ struct {
+ /* Scaling graph, reinitialized dynamically as needed. */
+ SwsGraph *graph[2]; /* top, bottom fields */
- // values passed to current sws_receive_slice() call
- int dst_slice_start;
- int dst_slice_height;
+ /* Hardware specific private data. */
+ void *hw_priv;
+ };
- /**
- * Note that src, dst, srcStride, dstStride will be copied in the
- * sws_scale() wrapper so they can be freely modified here.
- */
- SwsFunc convert_unscaled;
- int chrSrcW; ///< Width of source chroma planes.
- int chrSrcH; ///< Height of source chroma planes.
- int chrDstW; ///< Width of destination chroma planes.
- int chrDstH; ///< Height of destination chroma planes.
- int lumXInc, chrXInc;
- int lumYInc, chrYInc;
- int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format.
- int srcFormatBpp; ///< Number of bits per pixel of the source pixel format.
- int dstBpc, srcBpc;
- int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image.
- int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image.
- int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
- int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image.
- int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
- int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
+ /* Legacy static API (sws_init_context(), sws_scale()) */
+ struct {
+ AVSliceThread *slicethread;
+ SwsContext **slice_ctx;
+ int *slice_err;
+ int nb_slice_ctx;
- AVFrame *frame_src;
- AVFrame *frame_dst;
+ // values passed to current sws_receive_slice() call
+ int dst_slice_start;
+ int dst_slice_height;
- RangeList src_ranges;
+ /**
+ * Note that src, dst, srcStride, dstStride will be copied in the
+ * sws_scale() wrapper so they can be freely modified here.
+ */
+ SwsFunc convert_unscaled;
+ int chrSrcW; ///< Width of source chroma planes.
+ int chrSrcH; ///< Height of source chroma planes.
+ int chrDstW; ///< Width of destination chroma planes.
+ int chrDstH; ///< Height of destination chroma planes.
+ int lumXInc, chrXInc;
+ int lumYInc, chrYInc;
+ int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format.
+ int srcFormatBpp; ///< Number of bits per pixel of the source pixel format.
+ int dstBpc, srcBpc;
+ int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image.
+ int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image.
+ int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
+ int chrDstVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in destination image.
+ int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
+ int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
- /* The cascaded_* fields allow splitting a scaler task into multiple
- * sequential steps, this is for example used to limit the maximum
- * downscaling factor that needs to be supported in one scaler.
- */
- SwsContext *cascaded_context[3];
- int cascaded_tmpStride[2][4];
- uint8_t *cascaded_tmp[2][4];
- int cascaded_mainindex;
+ AVFrame *frame_src;
+ AVFrame *frame_dst;
- double gamma_value;
- int is_internal_gamma;
- uint16_t *gamma;
- uint16_t *inv_gamma;
+ RangeList src_ranges;
- int numDesc;
- int descIndex[2];
- int numSlice;
- struct SwsSlice *slice;
- struct SwsFilterDescriptor *desc;
+ /* The cascaded_* fields allow splitting a scaler task into multiple
+ * sequential steps, this is for example used to limit the maximum
+ * downscaling factor that needs to be supported in one scaler.
+ */
+ SwsContext *cascaded_context[3];
+ int cascaded_tmpStride[2][4];
+ uint8_t *cascaded_tmp[2][4];
+ int cascaded_mainindex;
- uint32_t pal_yuv[256];
- uint32_t pal_rgb[256];
+ double gamma_value;
+ int is_internal_gamma;
+ uint16_t *gamma;
+ uint16_t *inv_gamma;
- float uint2float_lut[256];
+ int numDesc;
+ int descIndex[2];
+ int numSlice;
+ struct SwsSlice *slice;
+ struct SwsFilterDescriptor *desc;
- /**
- * @name Scaled horizontal lines ring buffer.
- * The horizontal scaler keeps just enough scaled lines in a ring buffer
- * so they may be passed to the vertical scaler. The pointers to the
- * allocated buffers for each line are duplicated in sequence in the ring
- * buffer to simplify indexing and avoid wrapping around between lines
- * inside the vertical scaler code. The wrapping is done before the
- * vertical scaler is called.
- */
- //@{
- int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
- int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer.
- //@}
+ uint32_t pal_yuv[256];
+ uint32_t pal_rgb[256];
- uint8_t *formatConvBuffer;
- int needAlpha;
+ float uint2float_lut[256];
- /**
- * @name Horizontal and vertical filters.
- * To better understand the following fields, here is a pseudo-code of
- * their usage in filtering a horizontal line:
- * @code
- * for (i = 0; i < width; i++) {
- * dst[i] = 0;
- * for (j = 0; j < filterSize; j++)
- * dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ];
- * dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point.
- * }
- * @endcode
- */
- //@{
- int16_t *hLumFilter; ///< Array of horizontal filter coefficients for luma/alpha planes.
- int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes.
- int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes.
- int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes.
- int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
- int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
- int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
- int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
- int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
- int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
- int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
- int vChrFilterSize; ///< Vertical filter size for chroma pixels.
- //@}
+ /**
+ * @name Scaled horizontal lines ring buffer.
+ * The horizontal scaler keeps just enough scaled lines in a ring buffer
+ * so they may be passed to the vertical scaler. The pointers to the
+ * allocated buffers for each line are duplicated in sequence in the ring
+ * buffer to simplify indexing and avoid wrapping around between lines
+ * inside the vertical scaler code. The wrapping is done before the
+ * vertical scaler is called.
+ */
+ //@{
+ int lastInLumBuf; ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
+ int lastInChrBuf; ///< Last scaled horizontal chroma line from source in the ring buffer.
+ //@}
- int lumMmxextFilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes.
- int chrMmxextFilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes.
- uint8_t *lumMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes.
- uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes.
+ uint8_t *formatConvBuffer;
+ int needAlpha;
- int canMMXEXTBeUsed;
- int warned_unuseable_bilinear;
+ /**
+ * @name Horizontal and vertical filters.
+ * To better understand the following fields, here is a pseudo-code of
+ * their usage in filtering a horizontal line:
+ * @code
+ * for (i = 0; i < width; i++) {
+ * dst[i] = 0;
+ * for (j = 0; j < filterSize; j++)
+ * dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ];
+ * dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point.
+ * }
+ * @endcode
+ */
+ //@{
+ int16_t *hLumFilter; ///< Array of horizontal filter coefficients for luma/alpha planes.
+ int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes.
+ int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes.
+ int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes.
+ int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
+ int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes.
+ int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes.
+ int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes.
+ int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels.
+ int hChrFilterSize; ///< Horizontal filter size for chroma pixels.
+ int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels.
+ int vChrFilterSize; ///< Vertical filter size for chroma pixels.
+ //@}
- int dstY; ///< Last destination vertical line output from last slice.
- void *yuvTable; // pointer to the yuv->rgb table start so it can be freed()
- // alignment ensures the offset can be added in a single
- // instruction on e.g. ARM
- DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM];
- uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
- uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
- uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
- DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, the C vales are always at the XY_IDX points
+ int lumMmxextFilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes.
+ int chrMmxextFilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes.
+ uint8_t *lumMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes.
+ uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes.
+
+ int canMMXEXTBeUsed;
+ int warned_unuseable_bilinear;
+
+ int dstY; ///< Last destination vertical line output from last slice.
+ void *yuvTable; // pointer to the yuv->rgb table start so it can be freed()
+ // alignment ensures the offset can be added in a single
+ // instruction on e.g. ARM
+ DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
+ DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, the C vales are always at the XY_IDX points
#define RY_IDX 0
#define GY_IDX 1
#define BY_IDX 2
@@ -470,22 +481,22 @@ struct SwsInternal {
#define BV_IDX 8
#define RGB2YUV_SHIFT 15
- int *dither_error[4];
+ int *dither_error[4];
- //Colorspace stuff
- int contrast, brightness, saturation; // for sws_getColorspaceDetails
- int srcColorspaceTable[4];
- int dstColorspaceTable[4];
- int src0Alpha;
- int dst0Alpha;
- int srcXYZ;
- int dstXYZ;
- int yuv2rgb_y_offset;
- int yuv2rgb_y_coeff;
- int yuv2rgb_v2r_coeff;
- int yuv2rgb_v2g_coeff;
- int yuv2rgb_u2g_coeff;
- int yuv2rgb_u2b_coeff;
+ //Colorspace stuff
+ int contrast, brightness, saturation; // for sws_getColorspaceDetails
+ int srcColorspaceTable[4];
+ int dstColorspaceTable[4];
+ int src0Alpha;
+ int dst0Alpha;
+ int srcXYZ;
+ int dstXYZ;
+ int yuv2rgb_y_offset;
+ int yuv2rgb_y_coeff;
+ int yuv2rgb_v2r_coeff;
+ int yuv2rgb_v2g_coeff;
+ int yuv2rgb_u2g_coeff;
+ int yuv2rgb_u2b_coeff;
#define RED_DITHER "0*8"
#define GREEN_DITHER "1*8"
@@ -513,199 +524,196 @@ struct SwsInternal {
#define DITHER32 "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+80"
#define DITHER32_INT (11*8+4*4*MAX_FILTER_SIZE*3+80) // value equal to above, used for checking that the struct hasn't been changed by mistake
- DECLARE_ALIGNED(8, uint64_t, redDither);
- DECLARE_ALIGNED(8, uint64_t, greenDither);
- DECLARE_ALIGNED(8, uint64_t, blueDither);
+ DECLARE_ALIGNED(8, uint64_t, redDither);
+ DECLARE_ALIGNED(8, uint64_t, greenDither);
+ DECLARE_ALIGNED(8, uint64_t, blueDither);
- DECLARE_ALIGNED(8, uint64_t, yCoeff);
- DECLARE_ALIGNED(8, uint64_t, vrCoeff);
- DECLARE_ALIGNED(8, uint64_t, ubCoeff);
- DECLARE_ALIGNED(8, uint64_t, vgCoeff);
- DECLARE_ALIGNED(8, uint64_t, ugCoeff);
- DECLARE_ALIGNED(8, uint64_t, yOffset);
- DECLARE_ALIGNED(8, uint64_t, uOffset);
- DECLARE_ALIGNED(8, uint64_t, vOffset);
- int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
- int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
- int dstW_mmx;
- DECLARE_ALIGNED(8, uint64_t, esp);
- DECLARE_ALIGNED(8, uint64_t, vRounder);
- DECLARE_ALIGNED(8, uint64_t, u_temp);
- DECLARE_ALIGNED(8, uint64_t, v_temp);
- DECLARE_ALIGNED(8, uint64_t, y_temp);
- int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
- // alignment of these values is not necessary, but merely here
- // to maintain the same offset across x8632 and x86-64. Once we
- // use proper offset macros in the asm, they can be removed.
- DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
- DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
- DECLARE_ALIGNED(8, uint16_t, dither16)[8];
- DECLARE_ALIGNED(8, uint32_t, dither32)[8];
+ DECLARE_ALIGNED(8, uint64_t, yCoeff);
+ DECLARE_ALIGNED(8, uint64_t, vrCoeff);
+ DECLARE_ALIGNED(8, uint64_t, ubCoeff);
+ DECLARE_ALIGNED(8, uint64_t, vgCoeff);
+ DECLARE_ALIGNED(8, uint64_t, ugCoeff);
+ DECLARE_ALIGNED(8, uint64_t, yOffset);
+ DECLARE_ALIGNED(8, uint64_t, uOffset);
+ DECLARE_ALIGNED(8, uint64_t, vOffset);
+ int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
+ int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
+ int dstW_mmx;
+ DECLARE_ALIGNED(8, uint64_t, esp);
+ DECLARE_ALIGNED(8, uint64_t, vRounder);
+ DECLARE_ALIGNED(8, uint64_t, u_temp);
+ DECLARE_ALIGNED(8, uint64_t, v_temp);
+ DECLARE_ALIGNED(8, uint64_t, y_temp);
+ int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
+ // alignment of these values is not necessary, but merely here
+ // to maintain the same offset across x8632 and x86-64. Once we
+ // use proper offset macros in the asm, they can be removed.
+ DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+ DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
+ DECLARE_ALIGNED(8, uint16_t, dither16)[8];
+ DECLARE_ALIGNED(8, uint32_t, dither32)[8];
- const uint8_t *chrDither8, *lumDither8;
+ const uint8_t *chrDither8, *lumDither8;
#if HAVE_ALTIVEC
- vector signed short CY;
- vector signed short CRV;
- vector signed short CBU;
- vector signed short CGU;
- vector signed short CGV;
- vector signed short OY;
- vector unsigned short CSHIFT;
- vector signed short *vYCoeffsBank, *vCCoeffsBank;
+ vector signed short CY;
+ vector signed short CRV;
+ vector signed short CBU;
+ vector signed short CGU;
+ vector signed short CGV;
+ vector signed short OY;
+ vector unsigned short CSHIFT;
+ vector signed short *vYCoeffsBank, *vCCoeffsBank;
#endif
- int use_mmx_vfilter;
+ int use_mmx_vfilter;
/* pre defined color-spaces gamma */
#define XYZ_GAMMA (2.6)
#define RGB_GAMMA (2.2)
- SwsColorFunc xyz12Torgb48;
- SwsColorFunc rgb48Toxyz12;
- SwsColorXform xyz2rgb;
- SwsColorXform rgb2xyz;
+ SwsColorFunc xyz12Torgb48;
+ SwsColorFunc rgb48Toxyz12;
+ SwsColorXform xyz2rgb;
+ SwsColorXform rgb2xyz;
- /* function pointers for swscale() */
- yuv2planar1_fn yuv2plane1;
- yuv2planarX_fn yuv2planeX;
- yuv2interleavedX_fn yuv2nv12cX;
- yuv2packed1_fn yuv2packed1;
- yuv2packed2_fn yuv2packed2;
- yuv2packedX_fn yuv2packedX;
- yuv2anyX_fn yuv2anyX;
+ /* function pointers for swscale() */
+ yuv2planar1_fn yuv2plane1;
+ yuv2planarX_fn yuv2planeX;
+ yuv2interleavedX_fn yuv2nv12cX;
+ yuv2packed1_fn yuv2packed1;
+ yuv2packed2_fn yuv2packed2;
+ yuv2packedX_fn yuv2packedX;
+ yuv2anyX_fn yuv2anyX;
- /// Opaque data pointer passed to all input functions.
- void *input_opaque;
+ /// Opaque data pointer passed to all input functions.
+ void *input_opaque;
- planar1_YV12_fn lumToYV12;
- planar1_YV12_fn alpToYV12;
- planar2_YV12_fn chrToYV12;
+ planar1_YV12_fn lumToYV12;
+ planar1_YV12_fn alpToYV12;
+ planar2_YV12_fn chrToYV12;
- /**
- * Functions to read planar input, such as planar RGB, and convert
- * internally to Y/UV/A.
- */
- /** @{ */
- planarX_YV12_fn readLumPlanar;
- planarX_YV12_fn readAlpPlanar;
- planarX2_YV12_fn readChrPlanar;
- /** @} */
+ /**
+ * Functions to read planar input, such as planar RGB, and convert
+ * internally to Y/UV/A.
+ */
+ /** @{ */
+ planarX_YV12_fn readLumPlanar;
+ planarX_YV12_fn readAlpPlanar;
+ planarX2_YV12_fn readChrPlanar;
+ /** @} */
- /**
- * Scale one horizontal line of input data using a bilinear filter
- * to produce one line of output data. Compared to SwsInternal->hScale(),
- * please take note of the following caveats when using these:
- * - Scaling is done using only 7 bits instead of 14-bit coefficients.
- * - You can use no more than 5 input pixels to produce 4 output
- * pixels. Therefore, this filter should not be used for downscaling
- * by more than ~20% in width (because that equals more than 5/4th
- * downscaling and thus more than 5 pixels input per 4 pixels output).
- * - In general, bilinear filters create artifacts during downscaling
- * (even when <20%), because one output pixel will span more than one
- * input pixel, and thus some pixels will need edges of both neighbor
- * pixels to interpolate the output pixel. Since you can use at most
- * two input pixels per output pixel in bilinear scaling, this is
- * impossible and thus downscaling by any size will create artifacts.
- * To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR
- * in SwsInternal->flags.
- */
- /** @{ */
- void (*hyscale_fast)(SwsInternal *c,
- int16_t *dst, int dstWidth,
- const uint8_t *src, int srcW, int xInc);
- void (*hcscale_fast)(SwsInternal *c,
- int16_t *dst1, int16_t *dst2, int dstWidth,
- const uint8_t *src1, const uint8_t *src2,
- int srcW, int xInc);
- /** @} */
+ /**
+ * Scale one horizontal line of input data using a bilinear filter
+ * to produce one line of output data. Compared to SwsInternal->hScale(),
+ * please take note of the following caveats when using these:
+ * - Scaling is done using only 7 bits instead of 14-bit coefficients.
+ * - You can use no more than 5 input pixels to produce 4 output
+ * pixels. Therefore, this filter should not be used for downscaling
+ * by more than ~20% in width (because that equals more than 5/4th
+ * downscaling and thus more than 5 pixels input per 4 pixels output).
+ * - In general, bilinear filters create artifacts during downscaling
+ * (even when <20%), because one output pixel will span more than one
+ * input pixel, and thus some pixels will need edges of both neighbor
+ * pixels to interpolate the output pixel. Since you can use at most
+ * two input pixels per output pixel in bilinear scaling, this is
+ * impossible and thus downscaling by any size will create artifacts.
+ * To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR
+ * in SwsInternal->flags.
+ */
+ /** @{ */
+ void (*hyscale_fast)(SwsInternal *c,
+ int16_t *dst, int dstWidth,
+ const uint8_t *src, int srcW, int xInc);
+ void (*hcscale_fast)(SwsInternal *c,
+ int16_t *dst1, int16_t *dst2, int dstWidth,
+ const uint8_t *src1, const uint8_t *src2,
+ int srcW, int xInc);
+ /** @} */
- /**
- * Scale one horizontal line of input data using a filter over the input
- * lines, to produce one (differently sized) line of output data.
- *
- * @param dst pointer to destination buffer for horizontally scaled
- * data. If the number of bits per component of one
- * destination pixel (SwsInternal->dstBpc) is <= 10, data
- * will be 15 bpc in 16 bits (int16_t) width. Else (i.e.
- * SwsInternal->dstBpc == 16), data will be 19bpc in
- * 32 bits (int32_t) width.
- * @param dstW width of destination image
- * @param src pointer to source data to be scaled. If the number of
- * bits per component of a source pixel (SwsInternal->srcBpc)
- * is 8, this is 8bpc in 8 bits (uint8_t) width. Else
- * (i.e. SwsInternal->dstBpc > 8), this is native depth
- * in 16 bits (uint16_t) width. In other words, for 9-bit
- * YUV input, this is 9bpc, for 10-bit YUV input, this is
- * 10bpc, and for 16-bit RGB or YUV, this is 16bpc.
- * @param filter filter coefficients to be used per output pixel for
- * scaling. This contains 14bpp filtering coefficients.
- * Guaranteed to contain dstW * filterSize entries.
- * @param filterPos position of the first input pixel to be used for
- * each output pixel during scaling. Guaranteed to
- * contain dstW entries.
- * @param filterSize the number of input coefficients to be used (and
- * thus the number of input pixels to be used) for
- * creating a single output pixel. Is aligned to 4
- * (and input coefficients thus padded with zeroes)
- * to simplify creating SIMD code.
- */
- /** @{ */
- void (*hyScale)(SwsInternal *c, int16_t *dst, int dstW,
- const uint8_t *src, const int16_t *filter,
- const int32_t *filterPos, int filterSize);
- void (*hcScale)(SwsInternal *c, int16_t *dst, int dstW,
- const uint8_t *src, const int16_t *filter,
- const int32_t *filterPos, int filterSize);
- /** @} */
+ /**
+ * Scale one horizontal line of input data using a filter over the input
+ * lines, to produce one (differently sized) line of output data.
+ *
+ * @param dst pointer to destination buffer for horizontally scaled
+ * data. If the number of bits per component of one
+ * destination pixel (SwsInternal->dstBpc) is <= 10, data
+ * will be 15 bpc in 16 bits (int16_t) width. Else (i.e.
+ * SwsInternal->dstBpc == 16), data will be 19bpc in
+ * 32 bits (int32_t) width.
+ * @param dstW width of destination image
+ * @param src pointer to source data to be scaled. If the number of
+ * bits per component of a source pixel (SwsInternal->srcBpc)
+ * is 8, this is 8bpc in 8 bits (uint8_t) width. Else
+ * (i.e. SwsInternal->dstBpc > 8), this is native depth
+ * in 16 bits (uint16_t) width. In other words, for 9-bit
+ * YUV input, this is 9bpc, for 10-bit YUV input, this is
+ * 10bpc, and for 16-bit RGB or YUV, this is 16bpc.
+ * @param filter filter coefficients to be used per output pixel for
+ * scaling. This contains 14bpp filtering coefficients.
+ * Guaranteed to contain dstW * filterSize entries.
+ * @param filterPos position of the first input pixel to be used for
+ * each output pixel during scaling. Guaranteed to
+ * contain dstW entries.
+ * @param filterSize the number of input coefficients to be used (and
+ * thus the number of input pixels to be used) for
+ * creating a single output pixel. Is aligned to 4
+ * (and input coefficients thus padded with zeroes)
+ * to simplify creating SIMD code.
+ */
+ /** @{ */
+ void (*hyScale)(SwsInternal *c, int16_t *dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize);
+ void (*hcScale)(SwsInternal *c, int16_t *dst, int dstW,
+ const uint8_t *src, const int16_t *filter,
+ const int32_t *filterPos, int filterSize);
+ /** @} */
- /**
- * Color range conversion functions if needed.
- * If SwsInternal->dstBpc is > 14:
- * - int16_t *dst (data is 15 bpc)
- * - uint16_t coeff
- * - int32_t offset
- * Otherwise (SwsInternal->dstBpc is <= 14):
- * - int32_t *dst (data is 19 bpc)
- * - uint32_t coeff
- * - int64_t offset
- */
- /** @{ */
- void (*lumConvertRange)(int16_t *dst, int width,
- uint32_t coeff, int64_t offset);
- void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width,
- uint32_t coeff, int64_t offset);
- /** @} */
+ /**
+ * Color range conversion functions if needed.
+ * If SwsInternal->dstBpc is > 14:
+ * - int16_t *dst (data is 15 bpc)
+ * - uint16_t coeff
+ * - int32_t offset
+ * Otherwise (SwsInternal->dstBpc is <= 14):
+ * - int32_t *dst (data is 19 bpc)
+ * - uint32_t coeff
+ * - int64_t offset
+ */
+ /** @{ */
+ void (*lumConvertRange)(int16_t *dst, int width,
+ uint32_t coeff, int64_t offset);
+ void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width,
+ uint32_t coeff, int64_t offset);
+ /** @} */
- uint32_t lumConvertRange_coeff;
- uint32_t chrConvertRange_coeff;
- int64_t lumConvertRange_offset;
- int64_t chrConvertRange_offset;
+ uint32_t lumConvertRange_coeff;
+ uint32_t chrConvertRange_coeff;
+ int64_t lumConvertRange_offset;
+ int64_t chrConvertRange_offset;
- int needs_hcscale; ///< Set if there are chroma planes to be converted.
+ int needs_hcscale; ///< Set if there are chroma planes to be converted.
- // scratch buffer for converting packed rgb0 sources
- // filled with a copy of the input frame + fully opaque alpha,
- // then passed as input to further conversion
- uint8_t *rgb0_scratch;
- unsigned int rgb0_scratch_allocated;
+ // scratch buffer for converting packed rgb0 sources
+ // filled with a copy of the input frame + fully opaque alpha,
+ // then passed as input to further conversion
+ uint8_t *rgb0_scratch;
+ unsigned int rgb0_scratch_allocated;
- // scratch buffer for converting XYZ sources
- // filled with the input converted to rgb48
- // then passed as input to further conversion
- uint8_t *xyz_scratch;
- unsigned int xyz_scratch_allocated;
+ // scratch buffer for converting XYZ sources
+ // filled with the input converted to rgb48
+ // then passed as input to further conversion
+ uint8_t *xyz_scratch;
+ unsigned int xyz_scratch_allocated;
- unsigned int dst_slice_align;
- atomic_int stride_unaligned_warned;
- atomic_int data_unaligned_warned;
- int color_conversion_warned;
+ unsigned int dst_slice_align;
+ atomic_int stride_unaligned_warned;
+ atomic_int data_unaligned_warned;
+ int color_conversion_warned;
- Half2FloatTables *h2f_tables;
-
- // Hardware specific private data
- void *hw_priv;
-
- int is_legacy_init;
+ Half2FloatTables *h2f_tables;
+ };
+ };
};
//FIXME check init (where 0)
@@ -714,8 +722,9 @@ static_assert(offsetof(SwsInternal, redDither) + DITHER32_INT == offsetof(SwsInt
#if ARCH_X86_64
/* x86 yuv2gbrp uses the SwsInternal for yuv coefficients
- if struct offsets change the asm needs to be updated too */
-static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40348,
+ * if struct offsets change the asm needs to be updated too
+ * See: libswscale/x86/output.asm */
+static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40332,
"yuv2rgb_y_offset must be updated in x86 asm");
#endif
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index f2e884780a..7a1e5d9bc1 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -582,7 +582,7 @@ yuv2nv12cX_fn yuv2nv21
%if ARCH_X86_64
struc SwsInternal
- .padding: resb 40348 ; offsetof(SwsInternal, yuv2rgb_y_offset)
+ .padding: resb 40332 ; offsetof(SwsInternal, yuv2rgb_y_offset)
.yuv2rgb_y_offset: resd 1
.yuv2rgb_y_coeff: resd 1
.yuv2rgb_v2r_coeff: resd 1
--
2.52.0
_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org
reply other threads:[~2026-02-26 18:08 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=177212924425.25.7171214361113436911@29965ddac10e \
--to=ffmpeg-devel@ffmpeg.org \
--cc=code@ffmpeg.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git