From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> To: ffmpeg-devel@ffmpeg.org Cc: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Subject: [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Date: Thu, 8 Sep 2022 05:31:49 +0200 Message-ID: <GV1P250MB073751CB4FFB76280DD1A6708F409@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM> (raw) In-Reply-To: <GV1P250MB07370A2874066BF522C7670A8F409@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM> Up until now, libswscale/output.c used a macro to write an output pixel which involved a call to av_pix_fmt_desc_get() to find out whether the input pixel format is BE or LE despite this being known at compile-time (there are templates per pixfmt). Even worse, these calls are made in a loop, so that e.g. there are eight calls to av_pix_fmt_desc_get() for every pixel processed in yuv2rgba64_X_c_template() for 64bit RGB formats. This commit modifies these macros to ensure that isBE() is evaluated at compile-time. This saved 41184B of .text for me (GCC 11.2, -O3). Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- This must be the lowest-hanging fruit in the whole codebase. Two other question: Why do all these functions in swscale_internal.h take an enum AVPixelFormat instead of accepting an AVPixFmtDescriptor? And would making av_pix_fmt_desc_get() av_const be beneficial? libswscale/output.c | 101 +++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 43 deletions(-) diff --git a/libswscale/output.c b/libswscale/output.c index 40a4476c6d..590334eb57 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -919,7 +919,7 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422) #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B) #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R) #define output_pixel(pos, val) \ - if (isBE(target)) { \ + if (is_be) { \ AV_WB16(pos, val); \ } else { \ AV_WL16(pos, val); \ @@ -931,7 +931,8 @@ yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int32_t **unused_chrUSrc, const int32_t **unused_chrVSrc, int unused_chrFilterSize, const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes) + int y, enum AVPixelFormat target, + int unused_hasAlpha, int unused_eightbytes, int is_be) { int hasAlpha = !!alpSrc; int i; @@ -968,7 +969,8 @@ yuv2ya16_2_c_template(SwsContext *c, const int32_t *buf[2], const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2], const int32_t *abuf[2], uint16_t *dest, int dstW, int yalpha, int unused_uvalpha, int y, - enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes) + enum AVPixelFormat target, int unused_hasAlpha, + int unused_eightbytes, int is_be) { int hasAlpha = abuf && abuf[0] && abuf[1]; const int32_t *buf0 = buf[0], *buf1 = buf[1], @@ -999,7 +1001,8 @@ static av_always_inline void yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0, const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2], const int32_t *abuf0, uint16_t *dest, int dstW, - int unused_uvalpha, int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes) + int unused_uvalpha, int y, enum AVPixelFormat target, + int unused_hasAlpha, int unused_eightbytes, int is_be) { int hasAlpha = !!abuf0; int i; @@ -1027,7 +1030,8 @@ yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int32_t **chrUSrc, const int32_t **chrVSrc, int chrFilterSize, const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) + int y, enum AVPixelFormat target, int hasAlpha, int eightbytes, + int is_be) { int i; int A1 = 0xffff<<14, A2 = 0xffff<<14; @@ -1108,7 +1112,8 @@ yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2], const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf[2], uint16_t *dest, int dstW, int yalpha, int uvalpha, int y, - enum AVPixelFormat target, int hasAlpha, int eightbytes) + enum AVPixelFormat target, int hasAlpha, int eightbytes, + int is_be) { const int32_t *buf0 = buf[0], *buf1 = buf[1], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], @@ -1172,7 +1177,8 @@ static av_always_inline void yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf0, uint16_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) + int uvalpha, int y, enum AVPixelFormat target, + int hasAlpha, int eightbytes, int is_be) { const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; int i; @@ -1277,7 +1283,8 @@ yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int32_t **chrUSrc, const int32_t **chrVSrc, int chrFilterSize, const int32_t **alpSrc, uint16_t *dest, int dstW, - int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) + int y, enum AVPixelFormat target, int hasAlpha, + int eightbytes, int is_be) { int i; int A = 0xffff<<14; @@ -1340,7 +1347,8 @@ yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2], const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf[2], uint16_t *dest, int dstW, int yalpha, int uvalpha, int y, - enum AVPixelFormat target, int hasAlpha, int eightbytes) + enum AVPixelFormat target, int hasAlpha, int eightbytes, + int is_be) { const int32_t *buf0 = buf[0], *buf1 = buf[1], *ubuf0 = ubuf[0], *ubuf1 = ubuf[1], @@ -1391,7 +1399,8 @@ static av_always_inline void yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0, const int32_t *ubuf[2], const int32_t *vbuf[2], const int32_t *abuf0, uint16_t *dest, int dstW, - int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes) + int uvalpha, int y, enum AVPixelFormat target, + int hasAlpha, int eightbytes, int is_be) { const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0]; int i; @@ -1468,7 +1477,11 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0, #undef r_b #undef b_r -#define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \ +#define IS_BE_LE 0 +#define IS_BE_BE 1 +#define IS_BE(BE_LE) IS_BE_ ## BE_LE + +#define YUV2PACKED16WRAPPER_0(name, base, ext, fmt, is_be, hasAlpha, eightbytes) \ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ const int16_t **_lumSrc, int lumFilterSize, \ const int16_t *chrFilter, const int16_t **_chrUSrc, \ @@ -1483,7 +1496,7 @@ static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \ chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ - alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \ + alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes, is_be); \ } \ \ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ @@ -1497,7 +1510,7 @@ static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \ **abuf = (const int32_t **) _abuf; \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \ - dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \ + dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes, is_be); \ } \ \ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ @@ -1511,36 +1524,38 @@ static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \ *abuf0 = (const int32_t *) _abuf0; \ uint16_t *dest = (uint16_t *) _dest; \ name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \ - dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \ -} - -YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0) -YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0) - -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1) -YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1) + dstW, uvalpha, y, fmt, hasAlpha, eightbytes, is_be); \ +} +#define YUV2PACKED16WRAPPER(name, base, ext, fmt, endianness, hasAlpha, eightbytes) \ + YUV2PACKED16WRAPPER_0(name, base, ext, fmt ## endianness, IS_BE(endianness), hasAlpha, eightbytes) + +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48, BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48, LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48, BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48, LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64, BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64, LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64, BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64, LE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64, BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64, LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64, BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64, LE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16, BE, 1, 0) +YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16, LE, 1, 0) + +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48, BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48, LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48, BE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48, LE, 0, 0) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64, BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64, LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64, BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64, LE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64, BE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64, LE, 1, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64, BE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64, LE, 0, 1) /* * Write out 2 RGB pixels in the target pixel format. This function takes a -- 2.34.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2022-09-08 3:32 UTC|newest] Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-09-08 2:31 [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Andreas Rheinhardt 2022-09-08 2:38 ` [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get() Andreas Rheinhardt 2022-09-08 17:39 ` Michael Niedermayer 2022-09-08 19:38 ` Andreas Rheinhardt 2022-09-08 20:25 ` Michael Niedermayer 2022-09-08 21:44 ` Andreas Rheinhardt 2022-09-09 14:55 ` Michael Niedermayer 2022-09-09 18:15 ` Andreas Rheinhardt 2022-09-10 15:29 ` Michael Niedermayer 2022-09-10 16:34 ` Andreas Rheinhardt 2022-09-10 17:06 ` Michael Niedermayer 2022-09-09 15:52 ` Michael Niedermayer 2022-09-08 3:31 ` Andreas Rheinhardt [this message] 2022-09-16 8:39 ` [FFmpeg-devel] [PATCH 3/3] swscale/output: Don't call av_pix_fmt_desc_get() in a loop Paul B Mahol 2022-09-08 3:44 ` [FFmpeg-devel] [PATCH 1/2] swscale/input: Remove spec-incompliant '; ' Philip Langdale
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=GV1P250MB073751CB4FFB76280DD1A6708F409@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM \ --to=andreas.rheinhardt@outlook.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git