[FFmpeg-devel] [PR] swscale: some API usage improvements; and a contentious union (PR #22297)

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed

From: Niklas Haas via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>
To: ffmpeg-devel@ffmpeg.org
Cc: Niklas Haas <code@ffmpeg.org>
Subject: [FFmpeg-devel] [PR] swscale: some API usage improvements; and a contentious union (PR #22297)
Date: Thu, 26 Feb 2026 18:07:23 -0000
Message-ID: <177212924425.25.7171214361113436911@29965ddac10e> (raw)

PR #22297 opened by Niklas Haas (haasn)
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22297
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22297.patch

I have no particularly strong feelings about the second and third commits in this series, so feel free to reject. The first commit though is an API safety improvement for sure.


>From 61476f77b07a0474d22892cd4b894aefc0bcec6d Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Thu, 26 Feb 2026 18:46:04 +0100
Subject: [PATCH 1/3] swscale: explicitly track if a context is "legacy" or not

The legacy API is defined by sws_init_context(), sws_scale() etc., whereas
the "modern" API is defined by just using sws_scale_frame() without prior
init call.

This int allows us to cleanly distinguish the type of context, paving the
way for some minor refactoring.

As an immediate benefit, we now gain a bunch of explict error checks to
ensure the API is used correctly (i.e. sws_scale() not called before
sws_init_context()).

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git@haasn.dev>
---
 libswscale/swscale.c          | 15 +++++++++++++--
 libswscale/swscale_internal.h |  2 ++
 libswscale/utils.c            | 12 ++++++++++--
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 5dbd93e0a2..6cf91116cd 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1208,6 +1208,8 @@ static int scale_internal(SwsContext *sws,
 void sws_frame_end(SwsContext *sws)
 {
     SwsInternal *c = sws_internal(sws);
+    if (!c->is_legacy_init)
+        return;
     av_frame_unref(c->frame_src);
     av_frame_unref(c->frame_dst);
     c->src_ranges.nb_ranges = 0;
@@ -1217,6 +1219,8 @@ int sws_frame_start(SwsContext *sws, AVFrame *dst, const AVFrame *src)
 {
     SwsInternal *c = sws_internal(sws);
     int ret, allocated = 0;
+    if (!c->is_legacy_init)
+        return AVERROR(EINVAL);
 
     ret = av_frame_ref(c->frame_src, src);
     if (ret < 0)
@@ -1249,6 +1253,8 @@ int sws_send_slice(SwsContext *sws, unsigned int slice_start,
 {
     SwsInternal *c = sws_internal(sws);
     int ret;
+    if (!c->is_legacy_init)
+        return AVERROR(EINVAL);
 
     ret = ff_range_add(&c->src_ranges, slice_start, slice_height);
     if (ret < 0)
@@ -1272,6 +1278,8 @@ int sws_receive_slice(SwsContext *sws, unsigned int slice_start,
     SwsInternal *c = sws_internal(sws);
     unsigned int align = sws_receive_slice_alignment(sws);
     uint8_t *dst[4];
+    if (!c->is_legacy_init)
+        return AVERROR(EINVAL);
 
     /* wait until complete input has been received */
     if (!(c->src_ranges.nb_ranges == 1        &&
@@ -1379,9 +1387,9 @@ int sws_scale_frame(SwsContext *sws, AVFrame *dst, const AVFrame *src)
     if (!src || !dst)
         return AVERROR(EINVAL);
 
-    if (c->frame_src) {
+    if (c->is_legacy_init) {
         /* Context has been initialized with explicit values, fall back to
-         * legacy API */
+         * legacy API behavior. */
         ret = sws_frame_start(sws, dst, src);
         if (ret < 0)
             return ret;
@@ -1552,6 +1560,9 @@ int attribute_align_arg sws_scale(SwsContext *sws,
                                   const int dstStride[])
 {
     SwsInternal *c = sws_internal(sws);
+    if (!c->is_legacy_init)
+        return AVERROR(EINVAL);
+
     if (c->nb_slice_ctx) {
         sws = c->slice_ctx[0];
         c = sws_internal(sws);
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 81ec5ef0cc..e70b3ccbff 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -704,6 +704,8 @@ struct SwsInternal {
 
     // Hardware specific private data
     void *hw_priv;
+
+    int is_legacy_init;
 };
 //FIXME check init (where 0)
 
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 42b49a5cd5..8ce277b06e 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -854,6 +854,8 @@ int sws_setColorspaceDetails(SwsContext *sws, const int inv_table[4],
     const AVPixFmtDescriptor *desc_dst;
     const AVPixFmtDescriptor *desc_src;
     int ret, need_reinit = 0;
+    if (!c || !c->is_legacy_init)
+        return AVERROR(EINVAL);
 
     if (c->nb_slice_ctx) {
         int parent_ret = 0;
@@ -1009,8 +1011,8 @@ int sws_getColorspaceDetails(SwsContext *sws, int **inv_table,
                              int *brightness, int *contrast, int *saturation)
 {
     SwsInternal *c = sws_internal(sws);
-    if (!c)
-        return -1;
+    if (!c || !c->is_legacy_init)
+        return AVERROR(EINVAL);
 
     if (c->nb_slice_ctx) {
         return sws_getColorspaceDetails(c->slice_ctx[0], inv_table, srcRange,
@@ -1899,6 +1901,7 @@ av_cold int sws_init_context(SwsContext *sws, SwsFilter *srcFilter,
     enum AVPixelFormat src_format, dst_format;
     int ret;
 
+    c->is_legacy_init = 1;
     c->frame_src = av_frame_alloc();
     c->frame_dst = av_frame_alloc();
     if (!c->frame_src || !c->frame_dst)
@@ -2270,6 +2273,11 @@ void sws_freeContext(SwsContext *sws)
     for (i = 0; i < FF_ARRAY_ELEMS(c->graph); i++)
         ff_sws_graph_free(&c->graph[i]);
 
+    if (!c->is_legacy_init) {
+        av_free(c);
+        return;
+    }
+
     for (i = 0; i < c->nb_slice_ctx; i++)
         sws_freeContext(c->slice_ctx[i]);
     av_freep(&c->slice_ctx);
-- 
2.52.0


>From 9242e034b45d142b6e75e1183f3d3778a00113f3 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Thu, 26 Feb 2026 18:55:13 +0100
Subject: [PATCH 2/3] swscale/utils: reset non-legacy/dynamic fields on legacy
 init

Clear out any "dynamic API" state left over after a sws_init_context() call,
in the unlikely case that an API user switches to the legacy API after using
the modern one.

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git@haasn.dev>
---
 libswscale/utils.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/libswscale/utils.c b/libswscale/utils.c
index 8ce277b06e..125b580aef 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1893,6 +1893,24 @@ static int context_init_threaded(SwsContext *sws,
     return 0;
 }
 
+/**
+ * Clear out any state from the modern non-legacy API, and reset the context
+ * back to a state as if sws_scale_frame() had never been used with it.
+ */
+static void reset_dynamic_state(SwsContext *sws)
+{
+    SwsInternal *c = sws_internal(sws);
+    if (c->is_legacy_init)
+        return;
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(c->graph); i++)
+        ff_sws_graph_free(&c->graph[i]);
+
+#if CONFIG_VULKAN
+    ff_sws_vk_uninit(sws);
+#endif
+}
+
 av_cold int sws_init_context(SwsContext *sws, SwsFilter *srcFilter,
                              SwsFilter *dstFilter)
 {
@@ -1901,6 +1919,8 @@ av_cold int sws_init_context(SwsContext *sws, SwsFilter *srcFilter,
     enum AVPixelFormat src_format, dst_format;
     int ret;
 
+    /* Clear any state related to the modern non-legacy API */
+    reset_dynamic_state(sws);
     c->is_legacy_init = 1;
     c->frame_src = av_frame_alloc();
     c->frame_dst = av_frame_alloc();
@@ -2266,18 +2286,13 @@ void sws_freeContext(SwsContext *sws)
     if (!c)
         return;
 
-#if CONFIG_VULKAN
-    ff_sws_vk_uninit(sws);
-#endif
-
-    for (i = 0; i < FF_ARRAY_ELEMS(c->graph); i++)
-        ff_sws_graph_free(&c->graph[i]);
-
     if (!c->is_legacy_init) {
+        reset_dynamic_state(sws);
         av_free(c);
         return;
     }
 
+    /* Uninit legacy API fields */
     for (i = 0; i < c->nb_slice_ctx; i++)
         sws_freeContext(c->slice_ctx[i]);
     av_freep(&c->slice_ctx);
-- 
2.52.0


>From 319054d748dab70e7732cd338da167c85c681b12 Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Thu, 26 Feb 2026 18:33:35 +0100
Subject: [PATCH 3/3] swscale: define legacy and modern context internals as
 union

The previous commits ensure that we don't mix up these APIs.

The main benefits include:

1. Clearer code organization and communication of purpose

2. Better cache locality for the new code; which now doesn't need to
   read from the end of a ~40 kB struct to access newly added fields.

3. Ability to extend the non-legacy context internals without constantly
   breaking the hard-coded ASM offsets or needlessly growing this struct.

I've allowed myself the luxury of not double-indenting the nested union/state
blocks.

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git@haasn.dev>
---
 libswscale/swscale_internal.h | 601 +++++++++++++++++-----------------
 libswscale/x86/output.asm     |   2 +-
 2 files changed, 306 insertions(+), 297 deletions(-)

diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index e70b3ccbff..c9f00dc789 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -334,131 +334,142 @@ struct SwsInternal {
     /* Parent context (for slice contexts) */
     SwsContext *parent;
 
-    AVSliceThread      *slicethread;
-    SwsContext        **slice_ctx;
-    int                *slice_err;
-    int              nb_slice_ctx;
+    int is_legacy_init;
+    union {
 
-    /* Scaling graph, reinitialized dynamically as needed. */
-    SwsGraph *graph[2]; /* top, bottom fields */
+    /* Dynamic graph-based API (sws_scale_frame() without init) */
+    struct {
+        /* Scaling graph, reinitialized dynamically as needed. */
+        SwsGraph *graph[2]; /* top, bottom fields */
 
-    // values passed to current sws_receive_slice() call
-    int dst_slice_start;
-    int dst_slice_height;
+        /* Hardware specific private data. */
+        void *hw_priv;
+    };
 
-    /**
-     * Note that src, dst, srcStride, dstStride will be copied in the
-     * sws_scale() wrapper so they can be freely modified here.
-     */
-    SwsFunc convert_unscaled;
-    int chrSrcW;                  ///< Width  of source      chroma     planes.
-    int chrSrcH;                  ///< Height of source      chroma     planes.
-    int chrDstW;                  ///< Width  of destination chroma     planes.
-    int chrDstH;                  ///< Height of destination chroma     planes.
-    int lumXInc, chrXInc;
-    int lumYInc, chrYInc;
-    int dstFormatBpp;             ///< Number of bits per pixel of the destination pixel format.
-    int srcFormatBpp;             ///< Number of bits per pixel of the source      pixel format.
-    int dstBpc, srcBpc;
-    int chrSrcHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source      image.
-    int chrSrcVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in source      image.
-    int chrDstHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
-    int chrDstVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in destination image.
-    int vChrDrop;                 ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
-    int sliceDir;                 ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
+    /* Legacy static API (sws_init_context(), sws_scale()) */
+    struct {
+        AVSliceThread      *slicethread;
+        SwsContext        **slice_ctx;
+        int                *slice_err;
+        int              nb_slice_ctx;
 
-    AVFrame *frame_src;
-    AVFrame *frame_dst;
+        // values passed to current sws_receive_slice() call
+        int dst_slice_start;
+        int dst_slice_height;
 
-    RangeList src_ranges;
+        /**
+        * Note that src, dst, srcStride, dstStride will be copied in the
+        * sws_scale() wrapper so they can be freely modified here.
+        */
+        SwsFunc convert_unscaled;
+        int chrSrcW;                  ///< Width  of source      chroma     planes.
+        int chrSrcH;                  ///< Height of source      chroma     planes.
+        int chrDstW;                  ///< Width  of destination chroma     planes.
+        int chrDstH;                  ///< Height of destination chroma     planes.
+        int lumXInc, chrXInc;
+        int lumYInc, chrYInc;
+        int dstFormatBpp;             ///< Number of bits per pixel of the destination pixel format.
+        int srcFormatBpp;             ///< Number of bits per pixel of the source      pixel format.
+        int dstBpc, srcBpc;
+        int chrSrcHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source      image.
+        int chrSrcVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in source      image.
+        int chrDstHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
+        int chrDstVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in destination image.
+        int vChrDrop;                 ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
+        int sliceDir;                 ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
 
-    /* The cascaded_* fields allow splitting a scaler task into multiple
-     * sequential steps, this is for example used to limit the maximum
-     * downscaling factor that needs to be supported in one scaler.
-     */
-    SwsContext *cascaded_context[3];
-    int cascaded_tmpStride[2][4];
-    uint8_t *cascaded_tmp[2][4];
-    int cascaded_mainindex;
+        AVFrame *frame_src;
+        AVFrame *frame_dst;
 
-    double gamma_value;
-    int is_internal_gamma;
-    uint16_t *gamma;
-    uint16_t *inv_gamma;
+        RangeList src_ranges;
 
-    int numDesc;
-    int descIndex[2];
-    int numSlice;
-    struct SwsSlice *slice;
-    struct SwsFilterDescriptor *desc;
+        /* The cascaded_* fields allow splitting a scaler task into multiple
+        * sequential steps, this is for example used to limit the maximum
+        * downscaling factor that needs to be supported in one scaler.
+        */
+        SwsContext *cascaded_context[3];
+        int cascaded_tmpStride[2][4];
+        uint8_t *cascaded_tmp[2][4];
+        int cascaded_mainindex;
 
-    uint32_t pal_yuv[256];
-    uint32_t pal_rgb[256];
+        double gamma_value;
+        int is_internal_gamma;
+        uint16_t *gamma;
+        uint16_t *inv_gamma;
 
-    float uint2float_lut[256];
+        int numDesc;
+        int descIndex[2];
+        int numSlice;
+        struct SwsSlice *slice;
+        struct SwsFilterDescriptor *desc;
 
-    /**
-     * @name Scaled horizontal lines ring buffer.
-     * The horizontal scaler keeps just enough scaled lines in a ring buffer
-     * so they may be passed to the vertical scaler. The pointers to the
-     * allocated buffers for each line are duplicated in sequence in the ring
-     * buffer to simplify indexing and avoid wrapping around between lines
-     * inside the vertical scaler code. The wrapping is done before the
-     * vertical scaler is called.
-     */
-    //@{
-    int lastInLumBuf;             ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
-    int lastInChrBuf;             ///< Last scaled horizontal chroma     line from source in the ring buffer.
-    //@}
+        uint32_t pal_yuv[256];
+        uint32_t pal_rgb[256];
 
-    uint8_t *formatConvBuffer;
-    int needAlpha;
+        float uint2float_lut[256];
 
-    /**
-     * @name Horizontal and vertical filters.
-     * To better understand the following fields, here is a pseudo-code of
-     * their usage in filtering a horizontal line:
-     * @code
-     * for (i = 0; i < width; i++) {
-     *     dst[i] = 0;
-     *     for (j = 0; j < filterSize; j++)
-     *         dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ];
-     *     dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point.
-     * }
-     * @endcode
-     */
-    //@{
-    int16_t *hLumFilter;          ///< Array of horizontal filter coefficients for luma/alpha planes.
-    int16_t *hChrFilter;          ///< Array of horizontal filter coefficients for chroma     planes.
-    int16_t *vLumFilter;          ///< Array of vertical   filter coefficients for luma/alpha planes.
-    int16_t *vChrFilter;          ///< Array of vertical   filter coefficients for chroma     planes.
-    int32_t *hLumFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
-    int32_t *hChrFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for chroma     planes.
-    int32_t *vLumFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for luma/alpha planes.
-    int32_t *vChrFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for chroma     planes.
-    int hLumFilterSize;           ///< Horizontal filter size for luma/alpha pixels.
-    int hChrFilterSize;           ///< Horizontal filter size for chroma     pixels.
-    int vLumFilterSize;           ///< Vertical   filter size for luma/alpha pixels.
-    int vChrFilterSize;           ///< Vertical   filter size for chroma     pixels.
-    //@}
+        /**
+        * @name Scaled horizontal lines ring buffer.
+        * The horizontal scaler keeps just enough scaled lines in a ring buffer
+        * so they may be passed to the vertical scaler. The pointers to the
+        * allocated buffers for each line are duplicated in sequence in the ring
+        * buffer to simplify indexing and avoid wrapping around between lines
+        * inside the vertical scaler code. The wrapping is done before the
+        * vertical scaler is called.
+        */
+        //@{
+        int lastInLumBuf;             ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
+        int lastInChrBuf;             ///< Last scaled horizontal chroma     line from source in the ring buffer.
+        //@}
 
-    int lumMmxextFilterCodeSize;  ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes.
-    int chrMmxextFilterCodeSize;  ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes.
-    uint8_t *lumMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes.
-    uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes.
+        uint8_t *formatConvBuffer;
+        int needAlpha;
 
-    int canMMXEXTBeUsed;
-    int warned_unuseable_bilinear;
+        /**
+        * @name Horizontal and vertical filters.
+        * To better understand the following fields, here is a pseudo-code of
+        * their usage in filtering a horizontal line:
+        * @code
+        * for (i = 0; i < width; i++) {
+        *     dst[i] = 0;
+        *     for (j = 0; j < filterSize; j++)
+        *         dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ];
+        *     dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point.
+        * }
+        * @endcode
+        */
+        //@{
+        int16_t *hLumFilter;          ///< Array of horizontal filter coefficients for luma/alpha planes.
+        int16_t *hChrFilter;          ///< Array of horizontal filter coefficients for chroma     planes.
+        int16_t *vLumFilter;          ///< Array of vertical   filter coefficients for luma/alpha planes.
+        int16_t *vChrFilter;          ///< Array of vertical   filter coefficients for chroma     planes.
+        int32_t *hLumFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
+        int32_t *hChrFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for chroma     planes.
+        int32_t *vLumFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for luma/alpha planes.
+        int32_t *vChrFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for chroma     planes.
+        int hLumFilterSize;           ///< Horizontal filter size for luma/alpha pixels.
+        int hChrFilterSize;           ///< Horizontal filter size for chroma     pixels.
+        int vLumFilterSize;           ///< Vertical   filter size for luma/alpha pixels.
+        int vChrFilterSize;           ///< Vertical   filter size for chroma     pixels.
+        //@}
 
-    int dstY;                     ///< Last destination vertical line output from last slice.
-    void *yuvTable;             // pointer to the yuv->rgb table start so it can be freed()
-    // alignment ensures the offset can be added in a single
-    // instruction on e.g. ARM
-    DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM];
-    uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
-    uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
-    uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
-    DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, the C vales are always at the XY_IDX points
+        int lumMmxextFilterCodeSize;  ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes.
+        int chrMmxextFilterCodeSize;  ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes.
+        uint8_t *lumMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes.
+        uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes.
+
+        int canMMXEXTBeUsed;
+        int warned_unuseable_bilinear;
+
+        int dstY;                     ///< Last destination vertical line output from last slice.
+        void *yuvTable;             // pointer to the yuv->rgb table start so it can be freed()
+        // alignment ensures the offset can be added in a single
+        // instruction on e.g. ARM
+        DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM];
+        uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
+        uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
+        uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
+        DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, the C vales are always at the XY_IDX points
 #define RY_IDX 0
 #define GY_IDX 1
 #define BY_IDX 2
@@ -470,22 +481,22 @@ struct SwsInternal {
 #define BV_IDX 8
 #define RGB2YUV_SHIFT 15
 
-    int *dither_error[4];
+        int *dither_error[4];
 
-    //Colorspace stuff
-    int contrast, brightness, saturation;    // for sws_getColorspaceDetails
-    int srcColorspaceTable[4];
-    int dstColorspaceTable[4];
-    int src0Alpha;
-    int dst0Alpha;
-    int srcXYZ;
-    int dstXYZ;
-    int yuv2rgb_y_offset;
-    int yuv2rgb_y_coeff;
-    int yuv2rgb_v2r_coeff;
-    int yuv2rgb_v2g_coeff;
-    int yuv2rgb_u2g_coeff;
-    int yuv2rgb_u2b_coeff;
+        //Colorspace stuff
+        int contrast, brightness, saturation;    // for sws_getColorspaceDetails
+        int srcColorspaceTable[4];
+        int dstColorspaceTable[4];
+        int src0Alpha;
+        int dst0Alpha;
+        int srcXYZ;
+        int dstXYZ;
+        int yuv2rgb_y_offset;
+        int yuv2rgb_y_coeff;
+        int yuv2rgb_v2r_coeff;
+        int yuv2rgb_v2g_coeff;
+        int yuv2rgb_u2g_coeff;
+        int yuv2rgb_u2b_coeff;
 
 #define RED_DITHER            "0*8"
 #define GREEN_DITHER          "1*8"
@@ -513,199 +524,196 @@ struct SwsInternal {
 #define DITHER32              "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+80"
 #define DITHER32_INT          (11*8+4*4*MAX_FILTER_SIZE*3+80) // value equal to above, used for checking that the struct hasn't been changed by mistake
 
-    DECLARE_ALIGNED(8, uint64_t, redDither);
-    DECLARE_ALIGNED(8, uint64_t, greenDither);
-    DECLARE_ALIGNED(8, uint64_t, blueDither);
+        DECLARE_ALIGNED(8, uint64_t, redDither);
+        DECLARE_ALIGNED(8, uint64_t, greenDither);
+        DECLARE_ALIGNED(8, uint64_t, blueDither);
 
-    DECLARE_ALIGNED(8, uint64_t, yCoeff);
-    DECLARE_ALIGNED(8, uint64_t, vrCoeff);
-    DECLARE_ALIGNED(8, uint64_t, ubCoeff);
-    DECLARE_ALIGNED(8, uint64_t, vgCoeff);
-    DECLARE_ALIGNED(8, uint64_t, ugCoeff);
-    DECLARE_ALIGNED(8, uint64_t, yOffset);
-    DECLARE_ALIGNED(8, uint64_t, uOffset);
-    DECLARE_ALIGNED(8, uint64_t, vOffset);
-    int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
-    int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
-    int dstW_mmx;
-    DECLARE_ALIGNED(8, uint64_t, esp);
-    DECLARE_ALIGNED(8, uint64_t, vRounder);
-    DECLARE_ALIGNED(8, uint64_t, u_temp);
-    DECLARE_ALIGNED(8, uint64_t, v_temp);
-    DECLARE_ALIGNED(8, uint64_t, y_temp);
-    int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
-    // alignment of these values is not necessary, but merely here
-    // to maintain the same offset across x8632 and x86-64. Once we
-    // use proper offset macros in the asm, they can be removed.
-    DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
-    DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
-    DECLARE_ALIGNED(8, uint16_t, dither16)[8];
-    DECLARE_ALIGNED(8, uint32_t, dither32)[8];
+        DECLARE_ALIGNED(8, uint64_t, yCoeff);
+        DECLARE_ALIGNED(8, uint64_t, vrCoeff);
+        DECLARE_ALIGNED(8, uint64_t, ubCoeff);
+        DECLARE_ALIGNED(8, uint64_t, vgCoeff);
+        DECLARE_ALIGNED(8, uint64_t, ugCoeff);
+        DECLARE_ALIGNED(8, uint64_t, yOffset);
+        DECLARE_ALIGNED(8, uint64_t, uOffset);
+        DECLARE_ALIGNED(8, uint64_t, vOffset);
+        int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
+        int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
+        int dstW_mmx;
+        DECLARE_ALIGNED(8, uint64_t, esp);
+        DECLARE_ALIGNED(8, uint64_t, vRounder);
+        DECLARE_ALIGNED(8, uint64_t, u_temp);
+        DECLARE_ALIGNED(8, uint64_t, v_temp);
+        DECLARE_ALIGNED(8, uint64_t, y_temp);
+        int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
+        // alignment of these values is not necessary, but merely here
+        // to maintain the same offset across x8632 and x86-64. Once we
+        // use proper offset macros in the asm, they can be removed.
+        DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+        DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
+        DECLARE_ALIGNED(8, uint16_t, dither16)[8];
+        DECLARE_ALIGNED(8, uint32_t, dither32)[8];
 
-    const uint8_t *chrDither8, *lumDither8;
+        const uint8_t *chrDither8, *lumDither8;
 
 #if HAVE_ALTIVEC
-    vector signed short   CY;
-    vector signed short   CRV;
-    vector signed short   CBU;
-    vector signed short   CGU;
-    vector signed short   CGV;
-    vector signed short   OY;
-    vector unsigned short CSHIFT;
-    vector signed short  *vYCoeffsBank, *vCCoeffsBank;
+        vector signed short   CY;
+        vector signed short   CRV;
+        vector signed short   CBU;
+        vector signed short   CGU;
+        vector signed short   CGV;
+        vector signed short   OY;
+        vector unsigned short CSHIFT;
+        vector signed short  *vYCoeffsBank, *vCCoeffsBank;
 #endif
 
-    int use_mmx_vfilter;
+        int use_mmx_vfilter;
 
 /* pre defined color-spaces gamma */
 #define XYZ_GAMMA (2.6)
 #define RGB_GAMMA (2.2)
-    SwsColorFunc  xyz12Torgb48;
-    SwsColorFunc  rgb48Toxyz12;
-    SwsColorXform xyz2rgb;
-    SwsColorXform rgb2xyz;
+        SwsColorFunc  xyz12Torgb48;
+        SwsColorFunc  rgb48Toxyz12;
+        SwsColorXform xyz2rgb;
+        SwsColorXform rgb2xyz;
 
-    /* function pointers for swscale() */
-    yuv2planar1_fn yuv2plane1;
-    yuv2planarX_fn yuv2planeX;
-    yuv2interleavedX_fn yuv2nv12cX;
-    yuv2packed1_fn yuv2packed1;
-    yuv2packed2_fn yuv2packed2;
-    yuv2packedX_fn yuv2packedX;
-    yuv2anyX_fn yuv2anyX;
+        /* function pointers for swscale() */
+        yuv2planar1_fn yuv2plane1;
+        yuv2planarX_fn yuv2planeX;
+        yuv2interleavedX_fn yuv2nv12cX;
+        yuv2packed1_fn yuv2packed1;
+        yuv2packed2_fn yuv2packed2;
+        yuv2packedX_fn yuv2packedX;
+        yuv2anyX_fn yuv2anyX;
 
-    /// Opaque data pointer passed to all input functions.
-    void *input_opaque;
+        /// Opaque data pointer passed to all input functions.
+        void *input_opaque;
 
-    planar1_YV12_fn lumToYV12;
-    planar1_YV12_fn alpToYV12;
-    planar2_YV12_fn chrToYV12;
+        planar1_YV12_fn lumToYV12;
+        planar1_YV12_fn alpToYV12;
+        planar2_YV12_fn chrToYV12;
 
-    /**
-     * Functions to read planar input, such as planar RGB, and convert
-     * internally to Y/UV/A.
-     */
-    /** @{ */
-    planarX_YV12_fn  readLumPlanar;
-    planarX_YV12_fn  readAlpPlanar;
-    planarX2_YV12_fn readChrPlanar;
-    /** @} */
+        /**
+        * Functions to read planar input, such as planar RGB, and convert
+        * internally to Y/UV/A.
+        */
+        /** @{ */
+        planarX_YV12_fn  readLumPlanar;
+        planarX_YV12_fn  readAlpPlanar;
+        planarX2_YV12_fn readChrPlanar;
+        /** @} */
 
-    /**
-     * Scale one horizontal line of input data using a bilinear filter
-     * to produce one line of output data. Compared to SwsInternal->hScale(),
-     * please take note of the following caveats when using these:
-     * - Scaling is done using only 7 bits instead of 14-bit coefficients.
-     * - You can use no more than 5 input pixels to produce 4 output
-     *   pixels. Therefore, this filter should not be used for downscaling
-     *   by more than ~20% in width (because that equals more than 5/4th
-     *   downscaling and thus more than 5 pixels input per 4 pixels output).
-     * - In general, bilinear filters create artifacts during downscaling
-     *   (even when <20%), because one output pixel will span more than one
-     *   input pixel, and thus some pixels will need edges of both neighbor
-     *   pixels to interpolate the output pixel. Since you can use at most
-     *   two input pixels per output pixel in bilinear scaling, this is
-     *   impossible and thus downscaling by any size will create artifacts.
-     * To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR
-     * in SwsInternal->flags.
-     */
-    /** @{ */
-    void (*hyscale_fast)(SwsInternal *c,
-                         int16_t *dst, int dstWidth,
-                         const uint8_t *src, int srcW, int xInc);
-    void (*hcscale_fast)(SwsInternal *c,
-                         int16_t *dst1, int16_t *dst2, int dstWidth,
-                         const uint8_t *src1, const uint8_t *src2,
-                         int srcW, int xInc);
-    /** @} */
+        /**
+        * Scale one horizontal line of input data using a bilinear filter
+        * to produce one line of output data. Compared to SwsInternal->hScale(),
+        * please take note of the following caveats when using these:
+        * - Scaling is done using only 7 bits instead of 14-bit coefficients.
+        * - You can use no more than 5 input pixels to produce 4 output
+        *   pixels. Therefore, this filter should not be used for downscaling
+        *   by more than ~20% in width (because that equals more than 5/4th
+        *   downscaling and thus more than 5 pixels input per 4 pixels output).
+        * - In general, bilinear filters create artifacts during downscaling
+        *   (even when <20%), because one output pixel will span more than one
+        *   input pixel, and thus some pixels will need edges of both neighbor
+        *   pixels to interpolate the output pixel. Since you can use at most
+        *   two input pixels per output pixel in bilinear scaling, this is
+        *   impossible and thus downscaling by any size will create artifacts.
+        * To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR
+        * in SwsInternal->flags.
+        */
+        /** @{ */
+        void (*hyscale_fast)(SwsInternal *c,
+                            int16_t *dst, int dstWidth,
+                            const uint8_t *src, int srcW, int xInc);
+        void (*hcscale_fast)(SwsInternal *c,
+                            int16_t *dst1, int16_t *dst2, int dstWidth,
+                            const uint8_t *src1, const uint8_t *src2,
+                            int srcW, int xInc);
+        /** @} */
 
-    /**
-     * Scale one horizontal line of input data using a filter over the input
-     * lines, to produce one (differently sized) line of output data.
-     *
-     * @param dst        pointer to destination buffer for horizontally scaled
-     *                   data. If the number of bits per component of one
-     *                   destination pixel (SwsInternal->dstBpc) is <= 10, data
-     *                   will be 15 bpc in 16 bits (int16_t) width. Else (i.e.
-     *                   SwsInternal->dstBpc == 16), data will be 19bpc in
-     *                   32 bits (int32_t) width.
-     * @param dstW       width of destination image
-     * @param src        pointer to source data to be scaled. If the number of
-     *                   bits per component of a source pixel (SwsInternal->srcBpc)
-     *                   is 8, this is 8bpc in 8 bits (uint8_t) width. Else
-     *                   (i.e. SwsInternal->dstBpc > 8), this is native depth
-     *                   in 16 bits (uint16_t) width. In other words, for 9-bit
-     *                   YUV input, this is 9bpc, for 10-bit YUV input, this is
-     *                   10bpc, and for 16-bit RGB or YUV, this is 16bpc.
-     * @param filter     filter coefficients to be used per output pixel for
-     *                   scaling. This contains 14bpp filtering coefficients.
-     *                   Guaranteed to contain dstW * filterSize entries.
-     * @param filterPos  position of the first input pixel to be used for
-     *                   each output pixel during scaling. Guaranteed to
-     *                   contain dstW entries.
-     * @param filterSize the number of input coefficients to be used (and
-     *                   thus the number of input pixels to be used) for
-     *                   creating a single output pixel. Is aligned to 4
-     *                   (and input coefficients thus padded with zeroes)
-     *                   to simplify creating SIMD code.
-     */
-    /** @{ */
-    void (*hyScale)(SwsInternal *c, int16_t *dst, int dstW,
-                    const uint8_t *src, const int16_t *filter,
-                    const int32_t *filterPos, int filterSize);
-    void (*hcScale)(SwsInternal *c, int16_t *dst, int dstW,
-                    const uint8_t *src, const int16_t *filter,
-                    const int32_t *filterPos, int filterSize);
-    /** @} */
+        /**
+        * Scale one horizontal line of input data using a filter over the input
+        * lines, to produce one (differently sized) line of output data.
+        *
+        * @param dst        pointer to destination buffer for horizontally scaled
+        *                   data. If the number of bits per component of one
+        *                   destination pixel (SwsInternal->dstBpc) is <= 10, data
+        *                   will be 15 bpc in 16 bits (int16_t) width. Else (i.e.
+        *                   SwsInternal->dstBpc == 16), data will be 19bpc in
+        *                   32 bits (int32_t) width.
+        * @param dstW       width of destination image
+        * @param src        pointer to source data to be scaled. If the number of
+        *                   bits per component of a source pixel (SwsInternal->srcBpc)
+        *                   is 8, this is 8bpc in 8 bits (uint8_t) width. Else
+        *                   (i.e. SwsInternal->dstBpc > 8), this is native depth
+        *                   in 16 bits (uint16_t) width. In other words, for 9-bit
+        *                   YUV input, this is 9bpc, for 10-bit YUV input, this is
+        *                   10bpc, and for 16-bit RGB or YUV, this is 16bpc.
+        * @param filter     filter coefficients to be used per output pixel for
+        *                   scaling. This contains 14bpp filtering coefficients.
+        *                   Guaranteed to contain dstW * filterSize entries.
+        * @param filterPos  position of the first input pixel to be used for
+        *                   each output pixel during scaling. Guaranteed to
+        *                   contain dstW entries.
+        * @param filterSize the number of input coefficients to be used (and
+        *                   thus the number of input pixels to be used) for
+        *                   creating a single output pixel. Is aligned to 4
+        *                   (and input coefficients thus padded with zeroes)
+        *                   to simplify creating SIMD code.
+        */
+        /** @{ */
+        void (*hyScale)(SwsInternal *c, int16_t *dst, int dstW,
+                        const uint8_t *src, const int16_t *filter,
+                        const int32_t *filterPos, int filterSize);
+        void (*hcScale)(SwsInternal *c, int16_t *dst, int dstW,
+                        const uint8_t *src, const int16_t *filter,
+                        const int32_t *filterPos, int filterSize);
+        /** @} */
 
-    /**
-     * Color range conversion functions if needed.
-     * If SwsInternal->dstBpc is > 14:
-     * - int16_t *dst (data is 15 bpc)
-     * - uint16_t coeff
-     * - int32_t offset
-     * Otherwise (SwsInternal->dstBpc is <= 14):
-     * - int32_t *dst (data is 19 bpc)
-     * - uint32_t coeff
-     * - int64_t offset
-     */
-    /** @{ */
-    void (*lumConvertRange)(int16_t *dst, int width,
-                            uint32_t coeff, int64_t offset);
-    void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width,
-                            uint32_t coeff, int64_t offset);
-    /** @} */
+        /**
+        * Color range conversion functions if needed.
+        * If SwsInternal->dstBpc is > 14:
+        * - int16_t *dst (data is 15 bpc)
+        * - uint16_t coeff
+        * - int32_t offset
+        * Otherwise (SwsInternal->dstBpc is <= 14):
+        * - int32_t *dst (data is 19 bpc)
+        * - uint32_t coeff
+        * - int64_t offset
+        */
+        /** @{ */
+        void (*lumConvertRange)(int16_t *dst, int width,
+                                uint32_t coeff, int64_t offset);
+        void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width,
+                                uint32_t coeff, int64_t offset);
+        /** @} */
 
-    uint32_t lumConvertRange_coeff;
-    uint32_t chrConvertRange_coeff;
-    int64_t  lumConvertRange_offset;
-    int64_t  chrConvertRange_offset;
+        uint32_t lumConvertRange_coeff;
+        uint32_t chrConvertRange_coeff;
+        int64_t  lumConvertRange_offset;
+        int64_t  chrConvertRange_offset;
 
-    int needs_hcscale; ///< Set if there are chroma planes to be converted.
+        int needs_hcscale; ///< Set if there are chroma planes to be converted.
 
-    // scratch buffer for converting packed rgb0 sources
-    // filled with a copy of the input frame + fully opaque alpha,
-    // then passed as input to further conversion
-    uint8_t     *rgb0_scratch;
-    unsigned int rgb0_scratch_allocated;
+        // scratch buffer for converting packed rgb0 sources
+        // filled with a copy of the input frame + fully opaque alpha,
+        // then passed as input to further conversion
+        uint8_t     *rgb0_scratch;
+        unsigned int rgb0_scratch_allocated;
 
-    // scratch buffer for converting XYZ sources
-    // filled with the input converted to rgb48
-    // then passed as input to further conversion
-    uint8_t     *xyz_scratch;
-    unsigned int xyz_scratch_allocated;
+        // scratch buffer for converting XYZ sources
+        // filled with the input converted to rgb48
+        // then passed as input to further conversion
+        uint8_t     *xyz_scratch;
+        unsigned int xyz_scratch_allocated;
 
-    unsigned int dst_slice_align;
-    atomic_int   stride_unaligned_warned;
-    atomic_int   data_unaligned_warned;
-    int          color_conversion_warned;
+        unsigned int dst_slice_align;
+        atomic_int   stride_unaligned_warned;
+        atomic_int   data_unaligned_warned;
+        int          color_conversion_warned;
 
-    Half2FloatTables *h2f_tables;
-
-    // Hardware specific private data
-    void *hw_priv;
-
-    int is_legacy_init;
+        Half2FloatTables *h2f_tables;
+    };
+    };
 };
 //FIXME check init (where 0)
 
@@ -714,8 +722,9 @@ static_assert(offsetof(SwsInternal, redDither) + DITHER32_INT == offsetof(SwsInt
 
 #if ARCH_X86_64
 /* x86 yuv2gbrp uses the SwsInternal for yuv coefficients
-   if struct offsets change the asm needs to be updated too */
-static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40348,
+ * if struct offsets change the asm needs to be updated too
+ * See: libswscale/x86/output.asm */
+static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40332,
               "yuv2rgb_y_offset must be updated in x86 asm");
 #endif
 
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index f2e884780a..7a1e5d9bc1 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -582,7 +582,7 @@ yuv2nv12cX_fn yuv2nv21
 
 %if ARCH_X86_64
 struc SwsInternal
-    .padding:           resb 40348 ; offsetof(SwsInternal, yuv2rgb_y_offset)
+    .padding:           resb 40332 ; offsetof(SwsInternal, yuv2rgb_y_offset)
     .yuv2rgb_y_offset:  resd 1
     .yuv2rgb_y_coeff:   resd 1
     .yuv2rgb_v2r_coeff: resd 1
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- ffmpeg-devel@ffmpeg.org
To unsubscribe send an email to ffmpeg-devel-leave@ffmpeg.org

                 reply	other threads:[~2026-02-26 18:08 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=177212924425.25.7171214361113436911@29965ddac10e \
    --to=ffmpeg-devel@ffmpeg.org \
    --cc=code@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git