* [FFmpeg-devel] [PATCH v3 2/4] avcodec/bswapdsp: rename bswap_buf to bswap_buf32
2022-12-13 2:42 [FFmpeg-devel] [PATCH v3 1/4] avcodec/bswapdsp: remove unused cextern mindmark
@ 2022-12-13 2:42 ` mindmark
2022-12-13 2:42 ` [FFmpeg-devel] [PATCH v3 3/4] avcodec/bswapdsp: add documentation mindmark
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: mindmark @ 2022-12-13 2:42 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Mark Reid
From: Mark Reid <mindmark@gmail.com>
---
libavcodec/4xm.c | 8 ++++----
libavcodec/alsdec.c | 6 +++---
libavcodec/apedec.c | 4 ++--
libavcodec/asvdec.c | 4 ++--
libavcodec/asvenc.c | 4 ++--
libavcodec/bswapdsp.c | 8 ++++----
libavcodec/bswapdsp.h | 2 +-
libavcodec/eatqi.c | 4 ++--
libavcodec/fraps.c | 4 ++--
libavcodec/huffyuvdec.c | 8 ++++----
libavcodec/huffyuvenc.c | 2 +-
libavcodec/imm4.c | 6 +++---
libavcodec/mimic.c | 6 +++---
libavcodec/motionpixels.c | 4 ++--
libavcodec/mpc7.c | 4 ++--
libavcodec/rawdec.c | 2 +-
libavcodec/riscv/bswapdsp_init.c | 4 ++--
libavcodec/truemotion2.c | 4 ++--
libavcodec/truespeech.c | 2 +-
libavcodec/utvideodec.c | 12 ++++++------
libavcodec/utvideoenc.c | 6 +++---
libavcodec/x86/bswapdsp.asm | 2 +-
libavcodec/x86/bswapdsp_init.c | 6 +++---
libavcodec/ylc.c | 12 ++++++------
tests/checkasm/bswapdsp.c | 2 +-
25 files changed, 63 insertions(+), 63 deletions(-)
diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c
index 5636fdef2d..a26b2f9004 100644
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -469,8 +469,8 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
bitstream_size);
if (!f->bitstream_buffer)
return AVERROR(ENOMEM);
- f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra),
- bitstream_size / 4);
+ f->bbdsp.bswap32_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra),
+ bitstream_size / 4);
init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
wordstream_offset = extra + bitstream_size;
@@ -813,8 +813,8 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
prestream_size);
if (!f->bitstream_buffer)
return AVERROR(ENOMEM);
- f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) prestream,
- prestream_size / 4);
+ f->bbdsp.bswap32_buf(f->bitstream_buffer, (const uint32_t *) prestream,
+ prestream_size / 4);
init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
f->last_dc = 0 * 128 * 8 * 8;
diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c
index 4605b2248f..6f401cf890 100644
--- a/libavcodec/alsdec.c
+++ b/libavcodec/alsdec.c
@@ -1899,9 +1899,9 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
sample++)
*dest++ = av_bswap16(src[sample]);
} else {
- ctx->bdsp.bswap_buf((uint32_t *) ctx->crc_buffer,
- (uint32_t *) frame->data[0],
- ctx->cur_frame_length * channels);
+ ctx->bdsp.bswap32_buf((uint32_t *) ctx->crc_buffer,
+ (uint32_t *) frame->data[0],
+ ctx->cur_frame_length * channels);
}
crc_source = ctx->crc_buffer;
} else {
diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c
index c08d13d6c2..b7856c82e5 100644
--- a/libavcodec/apedec.c
+++ b/libavcodec/apedec.c
@@ -1499,8 +1499,8 @@ static int ape_decode_frame(AVCodecContext *avctx, AVFrame *frame,
av_fast_padded_malloc(&s->data, &s->data_size, buf_size);
if (!s->data)
return AVERROR(ENOMEM);
- s->bdsp.bswap_buf((uint32_t *) s->data, (const uint32_t *) buf,
- buf_size >> 2);
+ s->bdsp.bswap32_buf((uint32_t *) s->data, (const uint32_t *) buf,
+ buf_size >> 2);
memset(s->data + (buf_size & ~3), 0, buf_size & 3);
s->ptr = s->data;
s->data_end = s->data + buf_size;
diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c
index 699aab9f8f..25dab7473f 100644
--- a/libavcodec/asvdec.c
+++ b/libavcodec/asvdec.c
@@ -253,8 +253,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
if (!a->bitstream_buffer)
return AVERROR(ENOMEM);
- c->bbdsp.bswap_buf((uint32_t *) a->bitstream_buffer,
- (const uint32_t *) buf, buf_size / 4);
+ c->bbdsp.bswap32_buf((uint32_t *) a->bitstream_buffer,
+ (const uint32_t *) buf, buf_size / 4);
ret = init_get_bits8(&a->gb, a->bitstream_buffer, buf_size);
} else {
ret = init_get_bits8_le(&a->gb, buf, buf_size);
diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index 9da7cbb986..2bf67d686c 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -309,8 +309,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
size = (put_bytes_output(&a->pb) + 3) / 4;
if (avctx->codec_id == AV_CODEC_ID_ASV1) {
- c->bbdsp.bswap_buf((uint32_t *) pkt->data,
- (uint32_t *) pkt->data, size);
+ c->bbdsp.bswap32_buf((uint32_t *) pkt->data,
+ (uint32_t *) pkt->data, size);
}
pkt->size = size * 4;
diff --git a/libavcodec/bswapdsp.c b/libavcodec/bswapdsp.c
index f0ea2b55c5..432f53efbf 100644
--- a/libavcodec/bswapdsp.c
+++ b/libavcodec/bswapdsp.c
@@ -22,11 +22,11 @@
#include "libavutil/bswap.h"
#include "bswapdsp.h"
-static void bswap_buf(uint32_t *dst, const uint32_t *src, int w)
+static void bswap32_buf(uint32_t *dst, const uint32_t *src, int len)
{
int i;
- for (i = 0; i + 8 <= w; i += 8) {
+ for (i = 0; i + 8 <= len; i += 8) {
dst[i + 0] = av_bswap32(src[i + 0]);
dst[i + 1] = av_bswap32(src[i + 1]);
dst[i + 2] = av_bswap32(src[i + 2]);
@@ -36,7 +36,7 @@ static void bswap_buf(uint32_t *dst, const uint32_t *src, int w)
dst[i + 6] = av_bswap32(src[i + 6]);
dst[i + 7] = av_bswap32(src[i + 7]);
}
- for (; i < w; i++)
+ for (; i < len; i++)
dst[i + 0] = av_bswap32(src[i + 0]);
}
@@ -48,7 +48,7 @@ static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
av_cold void ff_bswapdsp_init(BswapDSPContext *c)
{
- c->bswap_buf = bswap_buf;
+ c->bswap32_buf = bswap32_buf;
c->bswap16_buf = bswap16_buf;
#if ARCH_RISCV
diff --git a/libavcodec/bswapdsp.h b/libavcodec/bswapdsp.h
index 6f4db66115..10519cfd2e 100644
--- a/libavcodec/bswapdsp.h
+++ b/libavcodec/bswapdsp.h
@@ -22,7 +22,7 @@
#include <stdint.h>
typedef struct BswapDSPContext {
- void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
+ void (*bswap32_buf)(uint32_t *dst, const uint32_t *src, int len);
void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
} BswapDSPContext;
diff --git a/libavcodec/eatqi.c b/libavcodec/eatqi.c
index e4f12b3db2..ec4bf02720 100644
--- a/libavcodec/eatqi.c
+++ b/libavcodec/eatqi.c
@@ -148,8 +148,8 @@ static int tqi_decode_frame(AVCodecContext *avctx, AVFrame *frame,
buf_end - buf);
if (!t->bitstream_buf)
return AVERROR(ENOMEM);
- t->bsdsp.bswap_buf(t->bitstream_buf, (const uint32_t *) buf,
- (buf_end - buf) / 4);
+ t->bsdsp.bswap32_buf(t->bitstream_buf, (const uint32_t *) buf,
+ (buf_end - buf) / 4);
init_get_bits(&t->gb, t->bitstream_buf, 8 * (buf_end - buf));
t->last_dc[0] =
diff --git a/libavcodec/fraps.c b/libavcodec/fraps.c
index 4c4c46b602..1a0f52ebe6 100644
--- a/libavcodec/fraps.c
+++ b/libavcodec/fraps.c
@@ -106,8 +106,8 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, int stride, int w,
/* we have built Huffman table and are ready to decode plane */
/* convert bits so they may be used by standard bitreader */
- s->bdsp.bswap_buf((uint32_t *) s->tmpbuf,
- (const uint32_t *) src, size >> 2);
+ s->bdsp.bswap32_buf((uint32_t *) s->tmpbuf,
+ (const uint32_t *) src, size >> 2);
if ((ret = init_get_bits8(&gb, s->tmpbuf, size)) < 0)
return ret;
diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c
index 7d3515cc88..f5793d2eec 100644
--- a/libavcodec/huffyuvdec.c
+++ b/libavcodec/huffyuvdec.c
@@ -1239,8 +1239,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
if (!s->bitstream_buffer)
return AVERROR(ENOMEM);
- s->bdsp.bswap_buf((uint32_t *) s->bitstream_buffer,
- (const uint32_t *) buf, buf_size / 4);
+ s->bdsp.bswap32_buf((uint32_t *) s->bitstream_buffer,
+ (const uint32_t *) buf, buf_size / 4);
if ((ret = ff_thread_get_buffer(avctx, p, 0)) < 0)
return ret;
@@ -1282,8 +1282,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
return AVERROR_INVALIDDATA;
y_offset = height - (slice + 1) * slice_height;
- s->bdsp.bswap_buf((uint32_t *)s->bitstream_buffer,
- (const uint32_t *)(buf + slice_offset), slice_size / 4);
+ s->bdsp.bswap32_buf((uint32_t *)s->bitstream_buffer,
+ (const uint32_t *)(buf + slice_offset), slice_size / 4);
} else {
y_offset = 0;
slice_offset = 0;
diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
index db274e37ad..b26f2a8fd6 100644
--- a/libavcodec/huffyuvenc.c
+++ b/libavcodec/huffyuvenc.c
@@ -1018,7 +1018,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
avctx->stats_out[0] = '\0';
if (!(s->avctx->flags2 & AV_CODEC_FLAG2_NO_OUTPUT)) {
flush_put_bits(&s->pb);
- s->bdsp.bswap_buf((uint32_t *) pkt->data, (uint32_t *) pkt->data, size);
+ s->bdsp.bswap32_buf((uint32_t *) pkt->data, (uint32_t *) pkt->data, size);
}
s->picture_number++;
diff --git a/libavcodec/imm4.c b/libavcodec/imm4.c
index ccec5dff43..1372afacfd 100644
--- a/libavcodec/imm4.c
+++ b/libavcodec/imm4.c
@@ -368,9 +368,9 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *frame,
if (!s->bitstream)
return AVERROR(ENOMEM);
- s->bdsp.bswap_buf((uint32_t *)s->bitstream,
- (uint32_t *)avpkt->data,
- (avpkt->size + 3) >> 2);
+ s->bdsp.bswap32_buf((uint32_t *)s->bitstream,
+ (uint32_t *)avpkt->data,
+ (avpkt->size + 3) >> 2);
if ((ret = init_get_bits8(gb, s->bitstream, FFALIGN(avpkt->size, 4))) < 0)
return ret;
diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c
index 891471b30e..6cc46b6c38 100644
--- a/libavcodec/mimic.c
+++ b/libavcodec/mimic.c
@@ -411,9 +411,9 @@ static int mimic_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
if (!ctx->swap_buf)
return AVERROR(ENOMEM);
- ctx->bbdsp.bswap_buf(ctx->swap_buf,
- (const uint32_t *) (buf + MIMIC_HEADER_SIZE),
- swap_buf_size >> 2);
+ ctx->bbdsp.bswap32_buf(ctx->swap_buf,
+ (const uint32_t *) (buf + MIMIC_HEADER_SIZE),
+ swap_buf_size >> 2);
init_get_bits(&ctx->gb, ctx->swap_buf, swap_buf_size << 3);
res = decode(ctx, quality, num_coeffs, !is_pframe);
diff --git a/libavcodec/motionpixels.c b/libavcodec/motionpixels.c
index 4141c5a495..9199c0eaa4 100644
--- a/libavcodec/motionpixels.c
+++ b/libavcodec/motionpixels.c
@@ -294,8 +294,8 @@ static int mp_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
av_fast_padded_malloc(&mp->bswapbuf, &mp->bswapbuf_size, buf_size);
if (!mp->bswapbuf)
return AVERROR(ENOMEM);
- mp->bdsp.bswap_buf((uint32_t *) mp->bswapbuf, (const uint32_t *) buf,
- buf_size / 4);
+ mp->bdsp.bswap32_buf((uint32_t *) mp->bswapbuf, (const uint32_t *) buf,
+ buf_size / 4);
if (buf_size & 3)
memcpy(mp->bswapbuf + (buf_size & ~3), buf + (buf_size & ~3), buf_size & 3);
init_get_bits(&gb, mp->bswapbuf, buf_size * 8);
diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c
index d2745366c2..cb15de88e1 100644
--- a/libavcodec/mpc7.c
+++ b/libavcodec/mpc7.c
@@ -93,7 +93,7 @@ static av_cold int mpc7_decode_init(AVCodecContext * avctx)
av_lfg_init(&c->rnd, 0xDEADBEEF);
ff_bswapdsp_init(&c->bdsp);
ff_mpadsp_init(&c->mpadsp);
- c->bdsp.bswap_buf((uint32_t *) buf, (const uint32_t *) avctx->extradata, 4);
+ c->bdsp.bswap32_buf((uint32_t *) buf, (const uint32_t *) avctx->extradata, 4);
init_get_bits(&gb, buf, 128);
c->IS = get_bits1(&gb);
@@ -211,7 +211,7 @@ static int mpc7_decode_frame(AVCodecContext *avctx, AVFrame *frame,
av_fast_padded_malloc(&c->bits, &c->buf_size, buf_size);
if (!c->bits)
return AVERROR(ENOMEM);
- c->bdsp.bswap_buf((uint32_t *) c->bits, (const uint32_t *) buf,
+ c->bdsp.bswap32_buf((uint32_t *) c->bits, (const uint32_t *) buf,
buf_size >> 2);
if ((ret = init_get_bits8(&gb, c->bits, buf_size)) < 0)
return ret;
diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index c20c317fed..e15e72eb14 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -321,7 +321,7 @@ static int raw_decode(AVCodecContext *avctx, AVFrame *frame,
if (swap == 16)
context->bbdsp.bswap16_buf(context->bitstream_buf, (const uint16_t*)buf, buf_size / 2);
else if (swap == 32)
- context->bbdsp.bswap_buf(context->bitstream_buf, (const uint32_t*)buf, buf_size / 4);
+ context->bbdsp.bswap32_buf(context->bitstream_buf, (const uint32_t*)buf, buf_size / 4);
else
return AVERROR_INVALIDDATA;
buf = context->bitstream_buf;
diff --git a/libavcodec/riscv/bswapdsp_init.c b/libavcodec/riscv/bswapdsp_init.c
index abe84ec1f7..4d229c53e3 100644
--- a/libavcodec/riscv/bswapdsp_init.c
+++ b/libavcodec/riscv/bswapdsp_init.c
@@ -35,11 +35,11 @@ av_cold void ff_bswapdsp_init_riscv(BswapDSPContext *c)
#if (__riscv_xlen >= 64)
if (cpu_flags & AV_CPU_FLAG_RVB_BASIC)
- c->bswap_buf = ff_bswap32_buf_rvb;
+ c->bswap32_buf = ff_bswap32_buf_rvb;
#endif
#if HAVE_RVV
if (cpu_flags & AV_CPU_FLAG_RVV_I32) {
- c->bswap_buf = ff_bswap32_buf_rvv;
+ c->bswap32_buf = ff_bswap32_buf_rvv;
c->bswap16_buf = ff_bswap16_buf_rvv;
}
#endif
diff --git a/libavcodec/truemotion2.c b/libavcodec/truemotion2.c
index b168b9cda1..772504d65b 100644
--- a/libavcodec/truemotion2.c
+++ b/libavcodec/truemotion2.c
@@ -907,8 +907,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *rframe,
if ((ret = ff_reget_buffer(avctx, p, 0)) < 0)
return ret;
- l->bdsp.bswap_buf((uint32_t *) l->buffer, (const uint32_t *) buf,
- buf_size >> 2);
+ l->bdsp.bswap32_buf((uint32_t *) l->buffer, (const uint32_t *) buf,
+ buf_size >> 2);
if ((ret = tm2_read_header(l, l->buffer)) < 0) {
return ret;
diff --git a/libavcodec/truespeech.c b/libavcodec/truespeech.c
index 454121cc75..5689db675c 100644
--- a/libavcodec/truespeech.c
+++ b/libavcodec/truespeech.c
@@ -82,7 +82,7 @@ static void truespeech_read_frame(TSContext *dec, const uint8_t *input)
{
GetBitContext gb;
- dec->bdsp.bswap_buf((uint32_t *) dec->buffer, (const uint32_t *) input, 8);
+ dec->bdsp.bswap32_buf((uint32_t *) dec->buffer, (const uint32_t *) input, 8);
init_get_bits(&gb, dec->buffer, 32 * 8);
dec->vector[7] = ts_codebook[7][get_bits(&gb, 3)];
diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index 83120d1b22..7b943faed8 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c
@@ -150,9 +150,9 @@ static int decode_plane10(UtvideoContext *c, int plane_no,
}
memset(c->slice_bits + slice_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
- c->bdsp.bswap_buf((uint32_t *) c->slice_bits,
- (uint32_t *)(src + slice_data_start + c->slices * 4),
- (slice_data_end - slice_data_start + 3) >> 2);
+ c->bdsp.bswap32_buf((uint32_t *) c->slice_bits,
+ (uint32_t *)(src + slice_data_start + c->slices * 4),
+ (slice_data_end - slice_data_start + 3) >> 2);
init_get_bits(&gb, c->slice_bits, slice_size * 8);
prev = 0x200;
@@ -311,9 +311,9 @@ static int decode_plane(UtvideoContext *c, int plane_no,
}
memset(c->slice_bits + slice_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
- c->bdsp.bswap_buf((uint32_t *) c->slice_bits,
- (uint32_t *)(src + slice_data_start + c->slices * 4),
- (slice_data_end - slice_data_start + 3) >> 2);
+ c->bdsp.bswap32_buf((uint32_t *) c->slice_bits,
+ (uint32_t *)(src + slice_data_start + c->slices * 4),
+ (slice_data_end - slice_data_start + 3) >> 2);
init_get_bits(&gb, c->slice_bits, slice_size * 8);
prev = 0x80;
diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c
index d4388da8ba..0cfec16ec3 100644
--- a/libavcodec/utvideoenc.c
+++ b/libavcodec/utvideoenc.c
@@ -490,9 +490,9 @@ static int encode_plane(AVCodecContext *avctx, const uint8_t *src,
slice_len = offset - slice_len;
/* Byteswap the written huffman codes */
- c->bdsp.bswap_buf((uint32_t *) c->slice_bits,
- (uint32_t *) c->slice_bits,
- slice_len >> 2);
+ c->bdsp.bswap32_buf((uint32_t *) c->slice_bits,
+ (uint32_t *) c->slice_bits,
+ slice_len >> 2);
/* Write the offset to the stream */
bytestream2_put_le32(pb, offset);
diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index 2aa235e13c..4e6918ea5a 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -97,7 +97,7 @@ SECTION .text
add r0, 16
%endmacro
-; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
+; void ff_.bswap32_buf(uint32_t *dst, const uint32_t *src, int w);
%macro BSWAP32_BUF 0
%if cpuflag(ssse3)||cpuflag(avx2)
cglobal bswap32_buf, 3,4,3
diff --git a/libavcodec/x86/bswapdsp_init.c b/libavcodec/x86/bswapdsp_init.c
index 877bab1a2c..b00a3efd2f 100644
--- a/libavcodec/x86/bswapdsp_init.c
+++ b/libavcodec/x86/bswapdsp_init.c
@@ -32,9 +32,9 @@ av_cold void ff_bswapdsp_init_x86(BswapDSPContext *c)
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE2(cpu_flags))
- c->bswap_buf = ff_bswap32_buf_sse2;
+ c->bswap32_buf = ff_bswap32_buf_sse2;
if (EXTERNAL_SSSE3(cpu_flags))
- c->bswap_buf = ff_bswap32_buf_ssse3;
+ c->bswap32_buf = ff_bswap32_buf_ssse3;
if (EXTERNAL_AVX2_FAST(cpu_flags))
- c->bswap_buf = ff_bswap32_buf_avx2;
+ c->bswap32_buf = ff_bswap32_buf_avx2;
}
diff --git a/libavcodec/ylc.c b/libavcodec/ylc.c
index 29c10f05da..8eace8c2e2 100644
--- a/libavcodec/ylc.c
+++ b/libavcodec/ylc.c
@@ -311,9 +311,9 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
memcpy(s->buffer, avpkt->data + toffset, boffset - toffset);
memset(s->buffer + boffset - toffset, 0, AV_INPUT_BUFFER_PADDING_SIZE);
- s->bdsp.bswap_buf((uint32_t *) s->buffer,
- (uint32_t *) s->buffer,
- (boffset - toffset + 3) >> 2);
+ s->bdsp.bswap32_buf((uint32_t *) s->buffer,
+ (uint32_t *) s->buffer,
+ (boffset - toffset + 3) >> 2);
if ((ret = init_get_bits8(&gb, s->buffer, boffset - toffset)) < 0)
return ret;
@@ -332,9 +332,9 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
memcpy(s->buffer, avpkt->data + boffset, avpkt->size - boffset);
memset(s->buffer + avpkt->size - boffset, 0, AV_INPUT_BUFFER_PADDING_SIZE);
- s->bdsp.bswap_buf((uint32_t *) s->buffer,
- (uint32_t *) s->buffer,
- (avpkt->size - boffset) >> 2);
+ s->bdsp.bswap32_buf((uint32_t *) s->buffer,
+ (uint32_t *) s->buffer,
+ (avpkt->size - boffset) >> 2);
if ((ret = init_get_bits8(&gb, s->buffer, avpkt->size - boffset)) < 0)
return ret;
diff --git a/tests/checkasm/bswapdsp.c b/tests/checkasm/bswapdsp.c
index d789e90de3..8ac6b54ae2 100644
--- a/tests/checkasm/bswapdsp.c
+++ b/tests/checkasm/bswapdsp.c
@@ -67,7 +67,7 @@ void checkasm_check_bswapdsp(void)
ff_bswapdsp_init(&h);
- if (check_func(h.bswap_buf, "bswap_buf"))
+ if (check_func(h.bswap32_buf, "bswap32_buf"))
check_bswap(uint32_t);
if (check_func(h.bswap16_buf, "bswap16_buf"))
--
2.31.1.windows.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* [FFmpeg-devel] [PATCH v3 4/4] swscale/bswapdsp: copy over bswapdsp from avcodec
2022-12-13 2:42 [FFmpeg-devel] [PATCH v3 1/4] avcodec/bswapdsp: remove unused cextern mindmark
2022-12-13 2:42 ` [FFmpeg-devel] [PATCH v3 2/4] avcodec/bswapdsp: rename bswap_buf to bswap_buf32 mindmark
2022-12-13 2:42 ` [FFmpeg-devel] [PATCH v3 3/4] avcodec/bswapdsp: add documentation mindmark
@ 2022-12-13 2:42 ` mindmark
2022-12-26 21:31 ` [FFmpeg-devel] [PATCH v3 1/4] avcodec/bswapdsp: remove unused cextern Mark Reid
3 siblings, 0 replies; 9+ messages in thread
From: mindmark @ 2022-12-13 2:42 UTC (permalink / raw)
To: ffmpeg-devel; +Cc: Mark Reid
From: Mark Reid <mindmark@gmail.com>
There are some places in input.c that could use it too
but they aren't currently being pass the SwsContext
---
libswscale/Makefile | 1 +
libswscale/bswapdsp.c | 59 ++++++++++++
libswscale/bswapdsp.h | 66 +++++++++++++
libswscale/output.c | 36 +++----
libswscale/riscv/Makefile | 7 +-
libswscale/riscv/bswapdsp_init.c | 46 +++++++++
libswscale/riscv/bswapdsp_rvb.S | 68 +++++++++++++
libswscale/riscv/bswapdsp_rvv.S | 62 ++++++++++++
libswscale/swscale_internal.h | 3 +
libswscale/swscale_unscaled.c | 26 ++---
libswscale/utils.c | 2 +
libswscale/x86/Makefile | 6 +-
libswscale/x86/bswapdsp.asm | 157 +++++++++++++++++++++++++++++++
libswscale/x86/bswapdsp_init.c | 40 ++++++++
14 files changed, 537 insertions(+), 42 deletions(-)
create mode 100644 libswscale/bswapdsp.c
create mode 100644 libswscale/bswapdsp.h
create mode 100644 libswscale/riscv/bswapdsp_init.c
create mode 100644 libswscale/riscv/bswapdsp_rvb.S
create mode 100644 libswscale/riscv/bswapdsp_rvv.S
create mode 100644 libswscale/x86/bswapdsp.asm
create mode 100644 libswscale/x86/bswapdsp_init.c
diff --git a/libswscale/Makefile b/libswscale/Makefile
index 757997b401..4a916739c3 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -6,6 +6,7 @@ HEADERS = swscale.h \
version_major.h \
OBJS = alphablend.o \
+ bswapdsp.o \
hscale.o \
hscale_fast_bilinear.o \
gamma.o \
diff --git a/libswscale/bswapdsp.c b/libswscale/bswapdsp.c
new file mode 100644
index 0000000000..a164d89a76
--- /dev/null
+++ b/libswscale/bswapdsp.c
@@ -0,0 +1,59 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/bswap.h"
+#include "bswapdsp.h"
+
+static void bswap32_buf(uint32_t *dst, const uint32_t *src, int len)
+{
+ int i;
+
+ for (i = 0; i + 8 <= len; i += 8) {
+ dst[i + 0] = av_bswap32(src[i + 0]);
+ dst[i + 1] = av_bswap32(src[i + 1]);
+ dst[i + 2] = av_bswap32(src[i + 2]);
+ dst[i + 3] = av_bswap32(src[i + 3]);
+ dst[i + 4] = av_bswap32(src[i + 4]);
+ dst[i + 5] = av_bswap32(src[i + 5]);
+ dst[i + 6] = av_bswap32(src[i + 6]);
+ dst[i + 7] = av_bswap32(src[i + 7]);
+ }
+ for (; i < len; i++)
+ dst[i + 0] = av_bswap32(src[i + 0]);
+}
+
+static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
+{
+ while (len--)
+ *dst++ = av_bswap16(*src++);
+}
+
+av_cold void ff_sws_bswapdsp_init(BswapDSPContext *c)
+{
+ c->bswap32_buf = bswap32_buf;
+ c->bswap16_buf = bswap16_buf;
+
+#if ARCH_RISCV
+ ff_sws_bswapdsp_init_riscv(c);
+#elif ARCH_X86
+ ff_sws_bswapdsp_init_x86(c);
+#endif
+}
diff --git a/libswscale/bswapdsp.h b/libswscale/bswapdsp.h
new file mode 100644
index 0000000000..f2e12d1b8f
--- /dev/null
+++ b/libswscale/bswapdsp.h
@@ -0,0 +1,66 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SWSCALE_BSWAPDSP_H
+#define SWSCALE_BSWAPDSP_H
+
+#include <stdint.h>
+
+/**
+ * @file
+ * Optimized buffer byte swapping routines.
+ */
+
+typedef struct BswapDSPContext {
+ /**
+ * Byte swap 32 bit elements in a buffer.
+
+ * @param dst Destination buffer.
+ * @param src Source buffer, may be the same as dst.
+ * @param len The number of elements in the buffer.
+ *
+ */
+ /** @{ */
+ void (*bswap32_buf)(uint32_t *dst, const uint32_t *src, int len);
+ /** @} */
+
+ /**
+ * Byte swap 16 bit elements in a buffer.
+ *
+ * @param dst Destination buffer.
+ * @param src Source buffer, may be the same as dst.
+ * @param len The number of elements in the buffer.
+ *
+ */
+ /** @{ */
+ void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len);
+ /** @} */
+} BswapDSPContext;
+
+
+/**
+ * Initialize BswapDSPContext function pointers.
+ *
+ * @param c pointer to BswapDSPContext
+ *
+ */
+void ff_sws_bswapdsp_init(BswapDSPContext *c);
+void ff_sws_bswapdsp_init_riscv(BswapDSPContext *c);
+void ff_sws_bswapdsp_init_x86(BswapDSPContext *c);
+
+#endif /* SWSCALE_BSWAPDSP_H */
diff --git a/libswscale/output.c b/libswscale/output.c
index 5c85bff971..cd44081e3d 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -2313,13 +2313,11 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
}
}
if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
- for (i = 0; i < dstW; i++) {
- dest16[0][i] = av_bswap16(dest16[0][i]);
- dest16[1][i] = av_bswap16(dest16[1][i]);
- dest16[2][i] = av_bswap16(dest16[2][i]);
- if (hasAlpha)
- dest16[3][i] = av_bswap16(dest16[3][i]);
- }
+ c->bsdsp.bswap16_buf(dest16[0], dest16[0], dstW);
+ c->bsdsp.bswap16_buf(dest16[1], dest16[1], dstW);
+ c->bsdsp.bswap16_buf(dest16[2], dest16[2], dstW);
+ if (hasAlpha)
+ c->bsdsp.bswap16_buf(dest16[3], dest16[3], dstW);
}
}
@@ -2385,13 +2383,11 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter,
dest16[3][i] = av_clip_uintp2(A, 30) >> 14;
}
if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
- for (i = 0; i < dstW; i++) {
- dest16[0][i] = av_bswap16(dest16[0][i]);
- dest16[1][i] = av_bswap16(dest16[1][i]);
- dest16[2][i] = av_bswap16(dest16[2][i]);
- if (hasAlpha)
- dest16[3][i] = av_bswap16(dest16[3][i]);
- }
+ c->bsdsp.bswap16_buf(dest16[0], dest16[0], dstW);
+ c->bsdsp.bswap16_buf(dest16[1], dest16[1], dstW);
+ c->bsdsp.bswap16_buf(dest16[2], dest16[2], dstW);
+ if (hasAlpha)
+ c->bsdsp.bswap16_buf(dest16[3], dest16[3], dstW);
}
}
@@ -2461,13 +2457,11 @@ yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14));
}
if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
- for (i = 0; i < dstW; i++) {
- dest32[0][i] = av_bswap32(dest32[0][i]);
- dest32[1][i] = av_bswap32(dest32[1][i]);
- dest32[2][i] = av_bswap32(dest32[2][i]);
- if (hasAlpha)
- dest32[3][i] = av_bswap32(dest32[3][i]);
- }
+ c->bsdsp.bswap32_buf(dest32[0], dest32[0], dstW);
+ c->bsdsp.bswap32_buf(dest32[1], dest32[1], dstW);
+ c->bsdsp.bswap32_buf(dest32[2], dest32[2], dstW);
+ if (hasAlpha)
+ c->bsdsp.bswap32_buf(dest32[3], dest32[3], dstW);
}
}
diff --git a/libswscale/riscv/Makefile b/libswscale/riscv/Makefile
index 214d877b62..bb33f2b707 100644
--- a/libswscale/riscv/Makefile
+++ b/libswscale/riscv/Makefile
@@ -1,2 +1,5 @@
-OBJS += riscv/rgb2rgb.o
-RVV-OBJS += riscv/rgb2rgb_rvv.o
+OBJS += riscv/bswapdsp_init.o \
+ riscv/bswapdsp_rvb.o \
+ riscv/rgb2rgb.o
+RVV-OBJS += riscv/bswapdsp_rvv.o \
+ riscv/rgb2rgb_rvv.o
diff --git a/libswscale/riscv/bswapdsp_init.c b/libswscale/riscv/bswapdsp_init.c
new file mode 100644
index 0000000000..9fedcde3fa
--- /dev/null
+++ b/libswscale/riscv/bswapdsp_init.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libswscale/bswapdsp.h"
+
+void ff_sws_bswap32_buf_rvb(uint32_t *dst, const uint32_t *src, int len);
+void ff_sws_bswap32_buf_rvv(uint32_t *dst, const uint32_t *src, int len);
+void ff_sws_bswap16_buf_rvv(uint16_t *dst, const uint16_t *src, int len);
+
+av_cold void ff_sws_bswapdsp_init_riscv(BswapDSPContext *c)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+#if (__riscv_xlen >= 64)
+ if (cpu_flags & AV_CPU_FLAG_RVB_BASIC)
+ c->bswap32_buf = ff_sws_bswap32_buf_rvb;
+#endif
+#if HAVE_RVV
+ if (cpu_flags & AV_CPU_FLAG_RVV_I32) {
+ c->bswap32_buf = ff_sws_bswap32_buf_rvv;
+ c->bswap16_buf = ff_sws_bswap16_buf_rvv;
+ }
+#endif
+}
diff --git a/libswscale/riscv/bswapdsp_rvb.S b/libswscale/riscv/bswapdsp_rvb.S
new file mode 100644
index 0000000000..92edbce7cd
--- /dev/null
+++ b/libswscale/riscv/bswapdsp_rvb.S
@@ -0,0 +1,68 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+#if (__riscv_xlen >= 64)
+func ff_sws_bswap32_buf_rvb, zbb
+ andi t0, a1, 4
+ beqz t0, 1f
+ /* Align a1 (input) to 64-bit */
+ lwu t0, (a1)
+ addi a0, a0, 4
+ rev8 t0, t0
+ addi a2, a2, -1
+ srli t0, t0, __riscv_xlen - 32
+ addi a1, a1, 4
+ sw t0, -4(a0)
+1:
+ andi a3, a2, -2
+ sh2add a2, a2, a0
+ beqz a3, 3f
+ sh2add a3, a3, a0
+2: /* 2 elements (64 bits) at a time on a 64-bit boundary */
+ ld t0, (a1)
+ addi a0, a0, 8
+ rev8 t0, t0
+#if (__riscv_xlen == 64)
+ srli t2, t0, 32
+ sw t0, -4(a0)
+#else
+ srli t1, t0, __riscv_xlen - 64
+ srli t2, t0, __riscv_xlen - 32
+ sw t1, -4(a0)
+#endif
+ addi a1, a1, 8
+ sw t2, -8(a0)
+ bne a0, a3, 2b
+3:
+ beq a0, a2, 5f
+4: /* Process last element */
+ lwu t0, (a1)
+ addi a0, a0, 4
+ rev8 t0, t0
+ addi a1, a1, 4
+ srli t0, t0, __riscv_xlen - 32
+ sw t0, -4(a0)
+5:
+ ret
+endfunc
+#endif
diff --git a/libswscale/riscv/bswapdsp_rvv.S b/libswscale/riscv/bswapdsp_rvv.S
new file mode 100644
index 0000000000..923cb9bc9d
--- /dev/null
+++ b/libswscale/riscv/bswapdsp_rvv.S
@@ -0,0 +1,62 @@
+/*
+ * Copyright © 2022 Rémi Denis-Courmont.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/riscv/asm.S"
+
+func ff_sws_bswap32_buf_rvv, zve32x
+ li t4, 4
+ addi t1, a0, 1
+ addi t2, a0, 2
+ addi t3, a0, 3
+1:
+ vsetvli t0, a2, e8, m1, ta, ma
+ vlseg4e8.v v8, (a1)
+ sub a2, a2, t0
+ sh2add a1, t0, a1
+ vsse8.v v8, (t3), t4
+ sh2add t3, t0, t3
+ vsse8.v v9, (t2), t4
+ sh2add t2, t0, t2
+ vsse8.v v10, (t1), t4
+ sh2add t1, t0, t1
+ vsse8.v v11, (a0), t4
+ sh2add a0, t0, a0
+ bnez a2, 1b
+
+ ret
+endfunc
+
+func ff_sws_bswap16_buf_rvv, zve32x
+ li t2, 2
+ addi t1, a0, 1
+1:
+ vsetvli t0, a2, e8, m1, ta, ma
+ vlseg2e8.v v8, (a1)
+ sub a2, a2, t0
+ sh1add a1, t0, a1
+ vsse8.v v8, (t1), t2
+ sh1add t1, t0, t1
+ vsse8.v v9, (a0), t2
+ sh1add a0, t0, a0
+ bnez a2, 1b
+
+ ret
+endfunc
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index abeebbb002..30ce4907cc 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -36,6 +36,7 @@
#include "libavutil/slicethread.h"
#include "libavutil/ppc/util_altivec.h"
#include "libavutil/half2float.h"
+#include "bswapdsp.h"
#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
@@ -682,6 +683,8 @@ typedef struct SwsContext {
atomic_int data_unaligned_warned;
Half2FloatTables *h2f_tables;
+
+ BswapDSPContext bsdsp;
} SwsContext;
//FIXME check init (where 0)
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 9af2e7ecc3..0010ab24d1 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -468,7 +468,7 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[],
int srcStride[], int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int i, j, p;
+ int i, p;
for (p = 0; p < 4; p++) {
int srcstr = srcStride[p] / 2;
@@ -480,9 +480,7 @@ static int bswap_16bpc(SwsContext *c, const uint8_t *src[],
continue;
dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr;
for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) {
- for (j = 0; j < min_stride; j++) {
- dstPtr[j] = av_bswap16(srcPtr[j]);
- }
+ c->bsdsp.bswap16_buf(dstPtr, srcPtr, min_stride);
srcPtr += srcstr;
dstPtr += dststr;
}
@@ -495,7 +493,7 @@ static int bswap_32bpc(SwsContext *c, const uint8_t *src[],
int srcStride[], int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int i, j, p;
+ int i, p;
for (p = 0; p < 4; p++) {
int srcstr = srcStride[p] / 4;
@@ -507,9 +505,7 @@ static int bswap_32bpc(SwsContext *c, const uint8_t *src[],
continue;
dstPtr += (srcSliceY >> c->chrDstVSubSample) * dststr;
for (i = 0; i < (srcSliceH >> c->chrDstVSubSample); i++) {
- for (j = 0; j < min_stride; j++) {
- dstPtr[j] = av_bswap32(srcPtr[j]);
- }
+ c->bsdsp.bswap32_buf(dstPtr, srcPtr, min_stride);
srcPtr += srcstr;
dstPtr += dststr;
}
@@ -1616,19 +1612,17 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
conv(srcPtr, dstPtr + dstStride[0] * srcSliceY,
(srcSliceH - 1) * srcStride[0] + c->srcW * srcBpp);
else {
- int i, j;
+ int i;
dstPtr += dstStride[0] * srcSliceY;
for (i = 0; i < srcSliceH; i++) {
if(src_bswap) {
- for(j=0; j<c->srcW; j++)
- ((uint16_t*)c->formatConvBuffer)[j] = av_bswap16(((uint16_t*)srcPtr)[j]);
+ c->bsdsp.bswap16_buf((uint16_t*)c->formatConvBuffer, (uint16_t*)srcPtr, c->srcW);
conv(c->formatConvBuffer, dstPtr, c->srcW * srcBpp);
}else
conv(srcPtr, dstPtr, c->srcW * srcBpp);
if(dst_bswap)
- for(j=0; j<c->srcW; j++)
- ((uint16_t*)dstPtr)[j] = av_bswap16(((uint16_t*)dstPtr)[j]);
+ c->bsdsp.bswap16_buf((uint16_t*)dstPtr, (uint16_t*)dstPtr, c->srcW);
srcPtr += srcStride[0];
dstPtr += dstStride[0];
}
@@ -1932,16 +1926,14 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
isBE(c->srcFormat) != isBE(c->dstFormat)) {
for (i = 0; i < height; i++) {
- for (j = 0; j < length; j++)
- ((uint16_t *) dstPtr)[j] = av_bswap16(((const uint16_t *) srcPtr)[j]);
+ c->bsdsp.bswap16_buf((uint16_t *)dstPtr, (const uint16_t *)srcPtr, length);
srcPtr += srcStride[plane];
dstPtr += dstStride[plane];
}
} else if (isFloat(c->srcFormat) && isFloat(c->dstFormat) &&
isBE(c->srcFormat) != isBE(c->dstFormat)) { /* swap float plane */
for (i = 0; i < height; i++) {
- for (j = 0; j < length; j++)
- ((uint32_t *) dstPtr)[j] = av_bswap32(((const uint32_t *) srcPtr)[j]);
+ c->bsdsp.bswap32_buf((uint32_t *)dstPtr, (const uint32_t *)srcPtr, length);
srcPtr += srcStride[plane];
dstPtr += dstStride[plane];
}
diff --git a/libswscale/utils.c b/libswscale/utils.c
index 90734f66ef..3ff8c2f84f 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1921,6 +1921,8 @@ static av_cold int sws_init_single_context(SwsContext *c, SwsFilter *srcFilter,
return 0;
}
+ ff_sws_bswapdsp_init(&c->bsdsp);
+
/* unscaled special cases */
if (unscaled && !usesHFilter && !usesVFilter &&
(c->srcRange == c->dstRange || isAnyRGB(dstFormat) ||
diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index 68391494be..f7cdda5a25 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -1,6 +1,7 @@
$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
-OBJS += x86/rgb2rgb.o \
+OBJS += x86/bswapdsp_init.o \
+ x86/rgb2rgb.o \
x86/swscale.o \
x86/yuv2rgb.o \
@@ -8,7 +9,8 @@ MMX-OBJS += x86/hscale_fast_bilinear_simd.o \
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
-X86ASM-OBJS += x86/input.o \
+X86ASM-OBJS += x86/bswapdsp.o \
+ x86/input.o \
x86/output.o \
x86/scale.o \
x86/scale_avx2.o \
diff --git a/libswscale/x86/bswapdsp.asm b/libswscale/x86/bswapdsp.asm
new file mode 100644
index 0000000000..84fc6fb000
--- /dev/null
+++ b/libswscale/x86/bswapdsp.asm
@@ -0,0 +1,157 @@
+;******************************************************************************
+;* optimized bswap buffer functions
+;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2003-2013 Michael Niedermayer
+;* Copyright (c) 2013 Daniel Kang
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+SECTION .text
+
+; %1 = aligned/unaligned
+%macro BSWAP_LOOPS 1
+ mov r3d, r2d
+ sar r2d, 3
+ jz .left4_%1
+%if cpuflag(avx2)
+ sar r2d, 1
+ jz .left8_%1
+%endif
+.loop8_%1:
+ mov%1 m0, [r1 + 0]
+ mov%1 m1, [r1 + mmsize]
+%if cpuflag(ssse3)||cpuflag(avx2)
+ pshufb m0, m2
+ pshufb m1, m2
+ mov%1 [r0 + 0], m0
+ mov%1 [r0 + mmsize], m1
+%else
+ pshuflw m0, m0, 10110001b
+ pshuflw m1, m1, 10110001b
+ pshufhw m0, m0, 10110001b
+ pshufhw m1, m1, 10110001b
+ mova m2, m0
+ mova m3, m1
+ psllw m0, 8
+ psllw m1, 8
+ psrlw m2, 8
+ psrlw m3, 8
+ por m2, m0
+ por m3, m1
+ mov%1 [r0 + 0], m2
+ mov%1 [r0 + 16], m3
+%endif
+ add r0, mmsize*2
+ add r1, mmsize*2
+ dec r2d
+ jnz .loop8_%1
+%if cpuflag(avx2)
+.left8_%1:
+ mov r2d, r3d
+ test r3d, 8
+ jz .left4_%1
+ mov%1 m0, [r1]
+ pshufb m0, m2
+ mov%1 [r0 + 0], m0
+ add r1, mmsize
+ add r0, mmsize
+%endif
+.left4_%1:
+ mov r2d, r3d
+ test r3d, 4
+ jz .left
+ mov%1 xm0, [r1]
+%if cpuflag(ssse3)
+ pshufb xm0, xm2
+ mov%1 [r0], xm0
+%else
+ pshuflw m0, m0, 10110001b
+ pshufhw m0, m0, 10110001b
+ mova m2, m0
+ psllw m0, 8
+ psrlw m2, 8
+ por m2, m0
+ mov%1 [r0], m2
+%endif
+ add r1, 16
+ add r0, 16
+%endmacro
+
+; void bswap32_buf(uint32_t *dst, const uint32_t *src, int w);
+%macro BSWAP32_BUF 0
+%if cpuflag(ssse3)||cpuflag(avx2)
+cglobal sws_bswap32_buf, 3,4,3
+ mov r3, r1
+ VBROADCASTI128 m2, [pb_bswap32]
+%else
+cglobal sws_bswap32_buf, 3,4,5
+ mov r3, r1
+%endif
+ or r3, r0
+ test r3, mmsize - 1
+ jz .start_align
+ BSWAP_LOOPS u
+ jmp .left
+.start_align:
+ BSWAP_LOOPS a
+.left:
+%if cpuflag(ssse3)
+ test r2d, 2
+ jz .left1
+ movq xm0, [r1]
+ pshufb xm0, xm2
+ movq [r0], xm0
+ add r1, 8
+ add r0, 8
+.left1:
+ test r2d, 1
+ jz .end
+ mov r2d, [r1]
+ bswap r2d
+ mov [r0], r2d
+%else
+ and r2d, 3
+ jz .end
+.loop2:
+ mov r3d, [r1]
+ bswap r3d
+ mov [r0], r3d
+ add r1, 4
+ add r0, 4
+ dec r2d
+ jnz .loop2
+%endif
+.end:
+ RET
+%endmacro
+
+INIT_XMM sse2
+BSWAP32_BUF
+
+INIT_XMM ssse3
+BSWAP32_BUF
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+BSWAP32_BUF
+%endif
diff --git a/libswscale/x86/bswapdsp_init.c b/libswscale/x86/bswapdsp_init.c
new file mode 100644
index 0000000000..57f2944db7
--- /dev/null
+++ b/libswscale/x86/bswapdsp_init.c
@@ -0,0 +1,40 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libswscale/bswapdsp.h"
+
+void ff_sws_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w);
+void ff_sws_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w);
+void ff_sws_bswap32_buf_avx2(uint32_t *dst, const uint32_t *src, int w);
+
+av_cold void ff_sws_bswapdsp_init_x86(BswapDSPContext *c)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_SSE2(cpu_flags))
+ c->bswap32_buf = ff_sws_bswap32_buf_sse2;
+ if (EXTERNAL_SSSE3(cpu_flags))
+ c->bswap32_buf = ff_sws_bswap32_buf_ssse3;
+ if (EXTERNAL_AVX2_FAST(cpu_flags))
+ c->bswap32_buf = ff_sws_bswap32_buf_avx2;
+}
--
2.31.1.windows.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread