From: Ben Avison <bavison@riscosopen.org> To: ffmpeg-devel@ffmpeg.org Cc: Ben Avison <bavison@riscosopen.org> Subject: [FFmpeg-devel] [PATCH v3 04/10] avcodec/vc1: Introduce fast path for unescaping bitstream buffer Date: Thu, 31 Mar 2022 18:23:45 +0100 Message-ID: <20220331172351.550818-5-bavison@riscosopen.org> (raw) In-Reply-To: <20220331172351.550818-1-bavison@riscosopen.org> Includes a checkasm test. Signed-off-by: Ben Avison <bavison@riscosopen.org> --- libavcodec/vc1dec.c | 20 ++++++------ libavcodec/vc1dsp.c | 2 ++ libavcodec/vc1dsp.h | 3 ++ tests/checkasm/vc1dsp.c | 67 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 10 deletions(-) diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index e279ffd1c1..0426e8a752 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -491,7 +491,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) size = next - start - 4; if (size <= 0) continue; - buf2_size = vc1_unescape_buffer(start + 4, size, buf2); + buf2_size = v->vc1dsp.vc1_unescape_buffer(start + 4, size, buf2); init_get_bits(&gb, buf2, buf2_size * 8); switch (AV_RB32(start)) { case VC1_CODE_SEQHDR: @@ -681,7 +681,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, case VC1_CODE_FRAME: if (avctx->hwaccel) buf_start = start; - buf_size2 = vc1_unescape_buffer(start + 4, size, buf2); + buf_size2 = v->vc1dsp.vc1_unescape_buffer(start + 4, size, buf2); break; case VC1_CODE_FIELD: { int buf_size3; @@ -698,8 +698,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, ret = AVERROR(ENOMEM); goto err; } - buf_size3 = vc1_unescape_buffer(start + 4, size, - slices[n_slices].buf); + buf_size3 = v->vc1dsp.vc1_unescape_buffer(start + 4, size, + slices[n_slices].buf); init_get_bits(&slices[n_slices].gb, slices[n_slices].buf, buf_size3 << 3); slices[n_slices].mby_start = avctx->coded_height + 31 >> 5; @@ -710,7 +710,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, break; } case VC1_CODE_ENTRYPOINT: /* it should be before frame data */ - buf_size2 = vc1_unescape_buffer(start + 4, size, buf2); + buf_size2 = v->vc1dsp.vc1_unescape_buffer(start + 4, size, buf2); init_get_bits(&s->gb, buf2, buf_size2 * 8); ff_vc1_decode_entry_point(avctx, v, &s->gb); break; @@ -727,8 +727,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, ret = AVERROR(ENOMEM); goto err; } - buf_size3 = vc1_unescape_buffer(start + 4, size, - slices[n_slices].buf); + buf_size3 = v->vc1dsp.vc1_unescape_buffer(start + 4, size, + slices[n_slices].buf); init_get_bits(&slices[n_slices].gb, slices[n_slices].buf, buf_size3 << 3); slices[n_slices].mby_start = get_bits(&slices[n_slices].gb, 9); @@ -762,7 +762,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, ret = AVERROR(ENOMEM); goto err; } - buf_size3 = vc1_unescape_buffer(divider + 4, buf + buf_size - divider - 4, slices[n_slices].buf); + buf_size3 = v->vc1dsp.vc1_unescape_buffer(divider + 4, buf + buf_size - divider - 4, slices[n_slices].buf); init_get_bits(&slices[n_slices].gb, slices[n_slices].buf, buf_size3 << 3); slices[n_slices].mby_start = s->mb_height + 1 >> 1; @@ -771,9 +771,9 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data, n_slices1 = n_slices - 1; n_slices++; } - buf_size2 = vc1_unescape_buffer(buf, divider - buf, buf2); + buf_size2 = v->vc1dsp.vc1_unescape_buffer(buf, divider - buf, buf2); } else { - buf_size2 = vc1_unescape_buffer(buf, buf_size, buf2); + buf_size2 = v->vc1dsp.vc1_unescape_buffer(buf, buf_size, buf2); } init_get_bits(&s->gb, buf2, buf_size2*8); } else{ diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c index f651d7d461..f1b7bb2397 100644 --- a/libavcodec/vc1dsp.c +++ b/libavcodec/vc1dsp.c @@ -34,6 +34,7 @@ #include "rnd_avg.h" #include "vc1dsp.h" #include "startcode.h" +#include "vc1_common.h" /* Apply overlap transform to horizontal edge */ static void vc1_v_overlap_c(uint8_t *src, ptrdiff_t stride) @@ -1030,6 +1031,7 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp) #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */ dsp->startcode_find_candidate = ff_startcode_find_candidate_c; + dsp->vc1_unescape_buffer = vc1_unescape_buffer; if (ARCH_AARCH64) ff_vc1dsp_init_aarch64(dsp); diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h index fe60025a2a..7ed1776ca7 100644 --- a/libavcodec/vc1dsp.h +++ b/libavcodec/vc1dsp.h @@ -80,6 +80,9 @@ typedef struct VC1DSPContext { * one or more further zero bytes and a one byte. */ int (*startcode_find_candidate)(const uint8_t *buf, int size); + + /* Copy a buffer, removing startcode emulation escape bytes as we go */ + int (*vc1_unescape_buffer)(const uint8_t *src, int size, uint8_t *dst); } VC1DSPContext; void ff_vc1dsp_init(VC1DSPContext* c); diff --git a/tests/checkasm/vc1dsp.c b/tests/checkasm/vc1dsp.c index 7d4457306f..52628d15e4 100644 --- a/tests/checkasm/vc1dsp.c +++ b/tests/checkasm/vc1dsp.c @@ -374,6 +374,70 @@ static void check_loop_filter(void) } } +#define TEST_UNESCAPE \ + do { \ + for (int count = 100; count > 0; --count) { \ + escaped_offset = rnd() & 7; \ + unescaped_offset = rnd() & 7; \ + escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \ + RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \ + len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \ + len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \ + if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \ + fail(); \ + } \ + } while (0) + +static void check_unescape(void) +{ + /* This appears to be a typical length of buffer in use */ +#define LOG2_UNESCAPE_BUF_SIZE 17 +#define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE) + LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]); + LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]); + LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]); + LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]); + + VC1DSPContext h; + + ff_vc1dsp_init(&h); + + if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) { + int len0, len1, escaped_offset, unescaped_offset, escaped_len; + declare_func_emms(AV_CPU_FLAG_MMX, int, const uint8_t *, int, uint8_t *); + + /* Test data which consists of escapes sequences packed as tightly as possible */ + for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x) + escaped1[x] = escaped0[x] = 3 * (x % 3 == 0); + TEST_UNESCAPE; + + /* Test random data */ + RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE); + TEST_UNESCAPE; + + /* Test data with escape sequences at random intervals */ + for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) { + int gap, gap_msb; + escaped1[x+0] = escaped0[x+0] = 0; + escaped1[x+1] = escaped0[x+1] = 0; + escaped1[x+2] = escaped0[x+2] = 3; + escaped1[x+3] = escaped0[x+3] = rnd() & 3; + gap_msb = 2u << (rnd() % 8); + gap = (rnd() &~ -gap_msb) | gap_msb; + x += gap; + } + TEST_UNESCAPE; + + /* Test data which is known to contain no escape sequences */ + memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE); + memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE); + TEST_UNESCAPE; + + /* Benchmark the no-escape-sequences case */ + bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1); + } +} + void checkasm_check_vc1dsp(void) { check_inv_trans_inplace(); @@ -382,4 +446,7 @@ void checkasm_check_vc1dsp(void) check_loop_filter(); report("loop_filter"); + + check_unescape(); + report("unescape_buffer"); } -- 2.25.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2022-03-31 17:25 UTC|newest] Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-03-31 17:23 [FFmpeg-devel] [PATCH v3 00/10] avcodec/vc1: Arm optimisations Ben Avison 2022-03-31 17:23 ` [FFmpeg-devel] [PATCH v3 01/10] checkasm: Add vc1dsp in-loop deblocking filter tests Ben Avison 2022-03-31 17:23 ` [FFmpeg-devel] [PATCH v3 02/10] checkasm: Add vc1dsp inverse transform tests Ben Avison 2022-03-31 17:23 ` [FFmpeg-devel] [PATCH v3 03/10] checkasm: Add idctdsp add/put-pixels-clamped tests Ben Avison 2022-03-31 17:23 ` Ben Avison [this message] 2022-03-31 17:23 ` [FFmpeg-devel] [PATCH v3 05/10] avcodec/vc1: Arm 64-bit NEON deblocking filter fast paths Ben Avison 2022-03-31 17:23 ` [FFmpeg-devel] [PATCH v3 06/10] avcodec/vc1: Arm 32-bit " Ben Avison 2022-03-31 17:23 ` [FFmpeg-devel] [PATCH v3 07/10] avcodec/vc1: Arm 64-bit NEON inverse transform " Ben Avison 2022-03-31 17:23 ` [FFmpeg-devel] [PATCH v3 08/10] avcodec/idctdsp: Arm 64-bit NEON block add and clamp " Ben Avison 2022-03-31 17:23 ` [FFmpeg-devel] [PATCH v3 09/10] avcodec/vc1: Arm 64-bit NEON unescape fast path Ben Avison 2022-03-31 17:23 ` [FFmpeg-devel] [PATCH v3 10/10] avcodec/vc1: Arm 32-bit " Ben Avison 2022-03-31 21:50 ` [FFmpeg-devel] [PATCH v3 00/10] avcodec/vc1: Arm optimisations Martin Storsjö 2022-04-01 7:08 ` Martin Storsjö
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20220331172351.550818-5-bavison@riscosopen.org \ --to=bavison@riscosopen.org \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git