* [FFmpeg-devel] [PATCH v4 1/2] checkasm: add hevc_deblock chroma test @ 2023-03-29 14:13 J. Dekker 2023-03-29 14:13 ` [FFmpeg-devel] [PATCH v4 2/2] lavc/aarch64: add hevc deblock chroma 8-12bit J. Dekker 2023-03-29 20:13 ` [FFmpeg-devel] [PATCH v4 1/2] checkasm: add hevc_deblock chroma test Martin Storsjö 0 siblings, 2 replies; 6+ messages in thread From: J. Dekker @ 2023-03-29 14:13 UTC (permalink / raw) To: ffmpeg-devel Signed-off-by: J. Dekker <jdek@itanimul.li> --- Will support other variants in the second version of these tests. tests/checkasm/Makefile | 2 +- tests/checkasm/checkasm.c | 1 + tests/checkasm/checkasm.h | 1 + tests/checkasm/hevc_deblock.c | 100 ++++++++++++++++++++++++++++++++++ tests/fate/checkasm.mak | 1 + 5 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 tests/checkasm/hevc_deblock.c diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index a6f06c7007..3e62a22bd6 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -28,7 +28,7 @@ AVCODECOBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuvdsp.o AVCODECOBJS-$(CONFIG_JPEG2000_DECODER) += jpeg2000dsp.o AVCODECOBJS-$(CONFIG_OPUS_DECODER) += opusdsp.o AVCODECOBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o -AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_idct.o hevc_sao.o hevc_pel.o +AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_deblock.o hevc_idct.o hevc_sao.o hevc_pel.o AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodsp.o AVCODECOBJS-$(CONFIG_V210_DECODER) += v210dec.o AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index e96d84a7da..c2184d260d 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -116,6 +116,7 @@ static const struct { #endif #if CONFIG_HEVC_DECODER { "hevc_add_res", checkasm_check_hevc_add_res }, + { "hevc_deblock", checkasm_check_hevc_deblock }, { "hevc_idct", checkasm_check_hevc_idct }, { "hevc_pel", checkasm_check_hevc_pel }, { "hevc_sao", checkasm_check_hevc_sao }, diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 8744a81218..89c643e6a0 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -60,6 +60,7 @@ void checkasm_check_h264dsp(void); void checkasm_check_h264pred(void); void checkasm_check_h264qpel(void); void checkasm_check_hevc_add_res(void); +void checkasm_check_hevc_deblock(void); void checkasm_check_hevc_idct(void); void checkasm_check_hevc_pel(void); void checkasm_check_hevc_sao(void); diff --git a/tests/checkasm/hevc_deblock.c b/tests/checkasm/hevc_deblock.c new file mode 100644 index 0000000000..f73e68e8a6 --- /dev/null +++ b/tests/checkasm/hevc_deblock.c @@ -0,0 +1,100 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <string.h> + +#include "libavutil/intreadwrite.h" +#include "libavutil/mem_internal.h" + +#include "libavcodec/avcodec.h" +#include "libavcodec/hevcdsp.h" + +#include "checkasm.h" + +static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff }; + +#define SIZEOF_PIXEL ((bit_depth + 7) / 8) +#define BUF_STRIDE (8 * 2) +#define BUF_LINES (8) +#define BUF_OFFSET (BUF_STRIDE * BUF_LINES) +#define BUF_SIZE (BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2) + +#define randomize_buffers(buf0, buf1, size) \ + do { \ + uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \ + int k; \ + for (k = 0; k < size; k += 4) { \ + uint32_t r = rnd() & mask; \ + AV_WN32A(buf0 + k, r); \ + AV_WN32A(buf1 + k, r); \ + } \ + } while (0) + +static void check_deblock_chroma(HEVCDSPContext h, int bit_depth) +{ + int32_t tc[2] = { 0, 0 }; + // no_p, no_q can only be { 0,0 } for assembly functions, see deblocking_filter_CTB() in hevc_filter.c + uint8_t no_p[2] = { 0, 0 }; + uint8_t no_q[2] = { 0, 0 }; + LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]); + LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]); + + declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride, int32_t *tc, uint8_t *no_p, uint8_t *no_q); + + if (check_func(h.hevc_h_loop_filter_chroma, "hevc_h_loop_filter_chroma%d", bit_depth)) { + randomize_buffers(buf0, buf1, BUF_SIZE); + for (int i = 0; i < 4; i++) { + // see betatable[] in hevc_filter.c + tc[0] = (rnd() & 63) + (rnd() & 1); + tc[1] = (rnd() & 63) + (rnd() & 1); + + call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q); + call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q); + if (memcmp(buf0, buf1, BUF_SIZE)) + fail(); + } + bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q); + } + + if (check_func(h.hevc_v_loop_filter_chroma, "hevc_v_loop_filter_chroma%d", bit_depth)) { + randomize_buffers(buf0, buf1, BUF_SIZE); + for (int i = 0; i < 4; i++) { + // see betatable[] in hevc_filter.c + tc[0] = (rnd() & 63) + (rnd() & 1); + tc[1] = (rnd() & 63) + (rnd() & 1); + + call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q); + call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q); + if (memcmp(buf0, buf1, BUF_SIZE)) + fail(); + } + bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q); + } +} + +void checkasm_check_hevc_deblock(void) +{ + int bit_depth; + + for (bit_depth = 8; bit_depth <= 12; bit_depth += 2) { + HEVCDSPContext h; + ff_hevc_dsp_init(&h, bit_depth); + check_deblock_chroma(h, bit_depth); + } + report("chroma"); +} diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak index a4e95541f5..faac764388 100644 --- a/tests/fate/checkasm.mak +++ b/tests/fate/checkasm.mak @@ -15,6 +15,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \ fate-checkasm-h264pred \ fate-checkasm-h264qpel \ fate-checkasm-hevc_add_res \ + fate-checkasm-hevc_deblock \ fate-checkasm-hevc_idct \ fate-checkasm-hevc_pel \ fate-checkasm-hevc_sao \ -- 2.37.1 (Apple Git-137.1) _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 6+ messages in thread
* [FFmpeg-devel] [PATCH v4 2/2] lavc/aarch64: add hevc deblock chroma 8-12bit 2023-03-29 14:13 [FFmpeg-devel] [PATCH v4 1/2] checkasm: add hevc_deblock chroma test J. Dekker @ 2023-03-29 14:13 ` J. Dekker 2023-03-29 20:29 ` Martin Storsjö 2023-03-29 20:13 ` [FFmpeg-devel] [PATCH v4 1/2] checkasm: add hevc_deblock chroma test Martin Storsjö 1 sibling, 1 reply; 6+ messages in thread From: J. Dekker @ 2023-03-29 14:13 UTC (permalink / raw) To: ffmpeg-devel Benched on Ampere Altra: hevc_h_loop_filter_chroma8_c: 367.7 hevc_h_loop_filter_chroma8_neon: 31.0 hevc_h_loop_filter_chroma10_c: 396.7 hevc_h_loop_filter_chroma10_neon: 27.5 hevc_h_loop_filter_chroma12_c: 377.0 hevc_h_loop_filter_chroma12_neon: 31.7 hevc_v_loop_filter_chroma8_c: 369.0 hevc_v_loop_filter_chroma8_neon: 55.0 hevc_v_loop_filter_chroma10_c: 389.0 hevc_v_loop_filter_chroma10_neon: 54.0 hevc_v_loop_filter_chroma12_c: 389.5 hevc_v_loop_filter_chroma12_neon: 53.0 Signed-off-by: J. Dekker <jdek@itanimul.li> --- Included Martin's comments, decent speedup on vertical filter (~50%). libavcodec/aarch64/Makefile | 3 +- libavcodec/aarch64/hevcdsp_deblock_neon.S | 180 ++++++++++++++++++++++ libavcodec/aarch64/hevcdsp_init_aarch64.c | 18 +++ 3 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 libavcodec/aarch64/hevcdsp_deblock_neon.S diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile index 02fb51c3ab..216191640c 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile @@ -65,7 +65,8 @@ NEON-OBJS-$(CONFIG_VP9_DECODER) += aarch64/vp9itxfm_16bpp_neon.o \ aarch64/vp9lpf_neon.o \ aarch64/vp9mc_16bpp_neon.o \ aarch64/vp9mc_neon.o -NEON-OBJS-$(CONFIG_HEVC_DECODER) += aarch64/hevcdsp_idct_neon.o \ +NEON-OBJS-$(CONFIG_HEVC_DECODER) += aarch64/hevcdsp_deblock_neon.o \ + aarch64/hevcdsp_idct_neon.o \ aarch64/hevcdsp_init_aarch64.o \ aarch64/hevcdsp_qpel_neon.o \ aarch64/hevcdsp_sao_neon.o diff --git a/libavcodec/aarch64/hevcdsp_deblock_neon.S b/libavcodec/aarch64/hevcdsp_deblock_neon.S new file mode 100644 index 0000000000..ed342e5ded --- /dev/null +++ b/libavcodec/aarch64/hevcdsp_deblock_neon.S @@ -0,0 +1,180 @@ +/* -*-arm64-*- + * vim: syntax=arm64asm + * + * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi> + * Copyright (c) 2023 J. Dekker <jdek@itanimul.li> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#include "libavutil/aarch64/asm.S" +#include "neon.S" + +.macro hevc_loop_filter_chroma_start bitdepth + mov x4, x30 + ldr w14, [x2] + ldr w15, [x2, #4] +.if \bitdepth > 8 + lsl w14, w14, #(\bitdepth - 8) + lsl w15, w15, #(\bitdepth - 8) +.endif + adds w2, w14, w15 + b.eq 1f + dup v16.4h, w14 + dup v17.4h, w15 + trn1 v16.2d, v16.2d, v17.2d +.if \bitdepth > 8 + mvni v19.8h, #((0xff << (\bitdepth - 8)) & 0xff), lsl #8 + movi v18.8h, #0 +.endif + neg v17.8h, v16.8h +.endm + +.macro hevc_loop_filter_chroma_body bitdepth +.if \bitdepth <= 8 + uxtl v20.8h, v0.8b // p1 + uxtl v1.8h, v1.8b // p0 + uxtl v2.8h, v2.8b // q0 + uxtl v23.8h, v3.8b // q1 + va .req v20 + vb .req v23 +.else // required to specify both cases as we are unable to do: v0 .req v20 + va .req v0 + vb .req v3 +.endif + sub v5.8h, v2.8h, v1.8h // q0 - p0 + sub v6.8h, va.8h, vb.8h // p1 - q1 + shl v5.8h, v5.8h, #2 + add v5.8h, v6.8h, v5.8h + srshr v5.8h, v5.8h, #3 + clip v17.8h, v16.8h, v5.8h + sqadd v1.8h, v1.8h, v5.8h // p0 + delta + sqsub v2.8h, v2.8h, v5.8h // q0 - delta +.if \bitdepth <= 8 + sqxtun v1.8b, v1.8h + sqxtun v2.8b, v2.8h +.unreq va +.unreq vb +.else + clip v18.8h, v19.8h, v1.8h, v2.8h +.endif +.endm + +function hevc_loop_filter_chroma_body_8_neon, export=0 + hevc_loop_filter_chroma_body 8 + ret +endfunc + +function hevc_loop_filter_chroma_body_10_neon, export=0 +hevc_loop_filter_chroma_body_12_neon: + hevc_loop_filter_chroma_body 10 + ret +endfunc + +// void ff_hevc_h_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q); + +.macro hevc_h_loop_filter_chroma bitdepth +function ff_hevc_h_loop_filter_chroma_\bitdepth\()_neon, export=1 + hevc_loop_filter_chroma_start \bitdepth + sub x0, x0, x1, lsl #1 +.if \bitdepth > 8 + ld1 {v0.8h}, [x0], x1 + ld1 {v1.8h}, [x0], x1 + ld1 {v2.8h}, [x0], x1 + ld1 {v3.8h}, [x0] +.else + ld1 {v0.8b}, [x0], x1 + ld1 {v1.8b}, [x0], x1 + ld1 {v2.8b}, [x0], x1 + ld1 {v3.8b}, [x0] +.endif + sub x0, x0, x1, lsl #1 + bl hevc_loop_filter_chroma_body_\bitdepth\()_neon +.if \bitdepth > 8 + st1 {v1.8h}, [x0], x1 + st1 {v2.8h}, [x0] +.else + st1 {v1.8b}, [x0], x1 + st1 {v2.8b}, [x0] +.endif +1: ret x4 +endfunc +.endm + +.macro hevc_v_loop_filter_chroma bitdepth +function ff_hevc_v_loop_filter_chroma_\bitdepth\()_neon, export=1 + hevc_loop_filter_chroma_start \bitdepth + sub x0, x0, #((0x5200 >> \bitdepth) & 0x6) // high -> 4, low -> 2 + add x3, x0, x1 + lsl x1, x1, #1 +.if \bitdepth > 8 + ld1 {v0.d}[0], [x0], x1 + ld1 {v1.d}[0], [x3], x1 + ld1 {v2.d}[0], [x0], x1 + ld1 {v3.d}[0], [x3], x1 + ld1 {v0.d}[1], [x0], x1 + ld1 {v1.d}[1], [x3], x1 + ld1 {v2.d}[1], [x0], x1 + ld1 {v3.d}[1], [x3], x1 + transpose_4x8H v0, v1, v2, v3, v28, v29, v30, v31 +.else + ld1 {v0.s}[0], [x0], x1 + ld1 {v1.s}[0], [x3], x1 + ld1 {v2.s}[0], [x0], x1 + ld1 {v3.s}[0], [x3], x1 + ld1 {v0.s}[1], [x0], x1 + ld1 {v1.s}[1], [x3], x1 + ld1 {v2.s}[1], [x0], x1 + ld1 {v3.s}[1], [x3], x1 + transpose_4x8B v0, v1, v2, v3, v28, v29, v30, v31 +.endif + sub x0, x0, x1, lsl #2 + sub x3, x3, x1, lsl #2 + bl hevc_loop_filter_chroma_body_\bitdepth\()_neon +.if \bitdepth > 8 + transpose_4x8H v0, v1, v2, v3, v28, v29, v30, v31 + st1 {v0.d}[0], [x0], x1 + st1 {v1.d}[0], [x3], x1 + st1 {v2.d}[0], [x0], x1 + st1 {v3.d}[0], [x3], x1 + st1 {v0.d}[1], [x0], x1 + st1 {v1.d}[1], [x3], x1 + st1 {v2.d}[1], [x0], x1 + st1 {v3.d}[1], [x3] +.else + transpose_4x8B v0, v1, v2, v3, v28, v29, v30, v31 + st1 {v0.s}[0], [x0], x1 + st1 {v1.s}[0], [x3], x1 + st1 {v2.s}[0], [x0], x1 + st1 {v3.s}[0], [x3], x1 + st1 {v0.s}[1], [x0], x1 + st1 {v1.s}[1], [x3], x1 + st1 {v2.s}[1], [x0], x1 + st1 {v3.s}[1], [x3] +.endif +1: ret x4 +endfunc +.endm + +hevc_h_loop_filter_chroma 8 +hevc_h_loop_filter_chroma 10 +hevc_h_loop_filter_chroma 12 + +hevc_v_loop_filter_chroma 8 +hevc_v_loop_filter_chroma 10 +hevc_v_loop_filter_chroma 12 diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c index 1deefca0a2..a923bae35c 100644 --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c @@ -25,6 +25,18 @@ #include "libavutil/aarch64/cpu.h" #include "libavcodec/hevcdsp.h" +void ff_hevc_v_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, + const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q); +void ff_hevc_v_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride, + const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q); +void ff_hevc_v_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride, + const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q); +void ff_hevc_h_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, + const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q); +void ff_hevc_h_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride, + const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q); +void ff_hevc_h_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride, + const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q); void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride); void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs, @@ -117,6 +129,8 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) if (!have_neon(av_get_cpu_flags())) return; if (bit_depth == 8) { + c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_neon; + c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_neon; c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon; c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon; c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon; @@ -167,6 +181,8 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_qpel_bi_h16_8_neon; } if (bit_depth == 10) { + c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_neon; + c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_neon; c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon; c->add_residual[1] = ff_hevc_add_residual_8x8_10_neon; c->add_residual[2] = ff_hevc_add_residual_16x16_10_neon; @@ -180,6 +196,8 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon; } if (bit_depth == 12) { + c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_neon; + c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_neon; c->add_residual[0] = ff_hevc_add_residual_4x4_12_neon; c->add_residual[1] = ff_hevc_add_residual_8x8_12_neon; c->add_residual[2] = ff_hevc_add_residual_16x16_12_neon; -- 2.37.1 (Apple Git-137.1) _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 2/2] lavc/aarch64: add hevc deblock chroma 8-12bit 2023-03-29 14:13 ` [FFmpeg-devel] [PATCH v4 2/2] lavc/aarch64: add hevc deblock chroma 8-12bit J. Dekker @ 2023-03-29 20:29 ` Martin Storsjö 2023-04-06 5:00 ` J. Dekker 0 siblings, 1 reply; 6+ messages in thread From: Martin Storsjö @ 2023-03-29 20:29 UTC (permalink / raw) To: FFmpeg development discussions and patches On Wed, 29 Mar 2023, J. Dekker wrote: > Benched on Ampere Altra: > > hevc_h_loop_filter_chroma8_c: 367.7 > hevc_h_loop_filter_chroma8_neon: 31.0 > hevc_h_loop_filter_chroma10_c: 396.7 > hevc_h_loop_filter_chroma10_neon: 27.5 > hevc_h_loop_filter_chroma12_c: 377.0 > hevc_h_loop_filter_chroma12_neon: 31.7 > hevc_v_loop_filter_chroma8_c: 369.0 > hevc_v_loop_filter_chroma8_neon: 55.0 > hevc_v_loop_filter_chroma10_c: 389.0 > hevc_v_loop_filter_chroma10_neon: 54.0 > hevc_v_loop_filter_chroma12_c: 389.5 > hevc_v_loop_filter_chroma12_neon: 53.0 > > Signed-off-by: J. Dekker <jdek@itanimul.li> > --- > > Included Martin's comments, decent speedup on vertical filter (~50%). > > libavcodec/aarch64/Makefile | 3 +- > libavcodec/aarch64/hevcdsp_deblock_neon.S | 180 ++++++++++++++++++++++ > libavcodec/aarch64/hevcdsp_init_aarch64.c | 18 +++ > 3 files changed, 200 insertions(+), 1 deletion(-) > create mode 100644 libavcodec/aarch64/hevcdsp_deblock_neon.S > > diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile > index 02fb51c3ab..216191640c 100644 > --- a/libavcodec/aarch64/Makefile > +++ b/libavcodec/aarch64/Makefile > @@ -65,7 +65,8 @@ NEON-OBJS-$(CONFIG_VP9_DECODER) += aarch64/vp9itxfm_16bpp_neon.o \ > aarch64/vp9lpf_neon.o \ > aarch64/vp9mc_16bpp_neon.o \ > aarch64/vp9mc_neon.o > -NEON-OBJS-$(CONFIG_HEVC_DECODER) += aarch64/hevcdsp_idct_neon.o \ > +NEON-OBJS-$(CONFIG_HEVC_DECODER) += aarch64/hevcdsp_deblock_neon.o \ > + aarch64/hevcdsp_idct_neon.o \ > aarch64/hevcdsp_init_aarch64.o \ > aarch64/hevcdsp_qpel_neon.o \ > aarch64/hevcdsp_sao_neon.o > diff --git a/libavcodec/aarch64/hevcdsp_deblock_neon.S b/libavcodec/aarch64/hevcdsp_deblock_neon.S > new file mode 100644 > index 0000000000..ed342e5ded > --- /dev/null > +++ b/libavcodec/aarch64/hevcdsp_deblock_neon.S > @@ -0,0 +1,180 @@ > +/* -*-arm64-*- > + * vim: syntax=arm64asm > + * > + * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi> > + * Copyright (c) 2023 J. Dekker <jdek@itanimul.li> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > + > +#include "libavutil/aarch64/asm.S" > +#include "neon.S" > + > +.macro hevc_loop_filter_chroma_start bitdepth > + mov x4, x30 > + ldr w14, [x2] > + ldr w15, [x2, #4] > +.if \bitdepth > 8 > + lsl w14, w14, #(\bitdepth - 8) > + lsl w15, w15, #(\bitdepth - 8) > +.endif > + adds w2, w14, w15 > + b.eq 1f > + dup v16.4h, w14 > + dup v17.4h, w15 > + trn1 v16.2d, v16.2d, v17.2d > +.if \bitdepth > 8 > + mvni v19.8h, #((0xff << (\bitdepth - 8)) & 0xff), lsl #8 > + movi v18.8h, #0 > +.endif > + neg v17.8h, v16.8h > +.endm > + > +.macro hevc_loop_filter_chroma_body bitdepth > +.if \bitdepth <= 8 > + uxtl v20.8h, v0.8b // p1 > + uxtl v1.8h, v1.8b // p0 > + uxtl v2.8h, v2.8b // q0 > + uxtl v23.8h, v3.8b // q1 > + va .req v20 > + vb .req v23 > +.else // required to specify both cases as we are unable to do: v0 .req v20 > + va .req v0 > + vb .req v3 > +.endif > + sub v5.8h, v2.8h, v1.8h // q0 - p0 > + sub v6.8h, va.8h, vb.8h // p1 - q1 > + shl v5.8h, v5.8h, #2 > + add v5.8h, v6.8h, v5.8h > + srshr v5.8h, v5.8h, #3 > + clip v17.8h, v16.8h, v5.8h > + sqadd v1.8h, v1.8h, v5.8h // p0 + delta > + sqsub v2.8h, v2.8h, v5.8h // q0 - delta > +.if \bitdepth <= 8 > + sqxtun v1.8b, v1.8h > + sqxtun v2.8b, v2.8h > +.unreq va > +.unreq vb Shouldn't the .unreq be outside of the .if/.else? > +.else > + clip v18.8h, v19.8h, v1.8h, v2.8h > +.endif > +.endm > + > +function hevc_loop_filter_chroma_body_8_neon, export=0 > + hevc_loop_filter_chroma_body 8 > + ret > +endfunc > + > +function hevc_loop_filter_chroma_body_10_neon, export=0 > +hevc_loop_filter_chroma_body_12_neon: > + hevc_loop_filter_chroma_body 10 > + ret > +endfunc > + > +// void ff_hevc_h_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q); > + > +.macro hevc_h_loop_filter_chroma bitdepth > +function ff_hevc_h_loop_filter_chroma_\bitdepth\()_neon, export=1 > + hevc_loop_filter_chroma_start \bitdepth > + sub x0, x0, x1, lsl #1 > +.if \bitdepth > 8 > + ld1 {v0.8h}, [x0], x1 > + ld1 {v1.8h}, [x0], x1 > + ld1 {v2.8h}, [x0], x1 > + ld1 {v3.8h}, [x0] > +.else > + ld1 {v0.8b}, [x0], x1 > + ld1 {v1.8b}, [x0], x1 > + ld1 {v2.8b}, [x0], x1 > + ld1 {v3.8b}, [x0] > +.endif > + sub x0, x0, x1, lsl #1 > + bl hevc_loop_filter_chroma_body_\bitdepth\()_neon > +.if \bitdepth > 8 > + st1 {v1.8h}, [x0], x1 > + st1 {v2.8h}, [x0] > +.else > + st1 {v1.8b}, [x0], x1 > + st1 {v2.8b}, [x0] > +.endif > +1: ret x4 > +endfunc > +.endm > + > +.macro hevc_v_loop_filter_chroma bitdepth > +function ff_hevc_v_loop_filter_chroma_\bitdepth\()_neon, export=1 > + hevc_loop_filter_chroma_start \bitdepth > + sub x0, x0, #((0x5200 >> \bitdepth) & 0x6) // high -> 4, low -> 2 TBH, I think this is rather obfuscated - I'd prefer to just move the sub (and the two instructions inbetween) back inside of the .if/.else, to have the sub instruction say more explicitly exactly what it does. Other than that, this patch LGTM now. // Martin _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 2/2] lavc/aarch64: add hevc deblock chroma 8-12bit 2023-03-29 20:29 ` Martin Storsjö @ 2023-04-06 5:00 ` J. Dekker 0 siblings, 0 replies; 6+ messages in thread From: J. Dekker @ 2023-04-06 5:00 UTC (permalink / raw) To: FFmpeg development discussions and patches On Wed, Mar 29, 2023 at 11:29:09PM +0300, Martin Storsjö wrote: > On Wed, 29 Mar 2023, J. Dekker wrote: > > > Benched on Ampere Altra: > > > > hevc_h_loop_filter_chroma8_c: 367.7 > > hevc_h_loop_filter_chroma8_neon: 31.0 > > hevc_h_loop_filter_chroma10_c: 396.7 > > hevc_h_loop_filter_chroma10_neon: 27.5 > > hevc_h_loop_filter_chroma12_c: 377.0 > > hevc_h_loop_filter_chroma12_neon: 31.7 > > hevc_v_loop_filter_chroma8_c: 369.0 > > hevc_v_loop_filter_chroma8_neon: 55.0 > > hevc_v_loop_filter_chroma10_c: 389.0 > > hevc_v_loop_filter_chroma10_neon: 54.0 > > hevc_v_loop_filter_chroma12_c: 389.5 > > hevc_v_loop_filter_chroma12_neon: 53.0 > > > > Signed-off-by: J. Dekker <jdek@itanimul.li> > > --- > > > > Included Martin's comments, decent speedup on vertical filter (~50%). > > > > libavcodec/aarch64/Makefile | 3 +- > > libavcodec/aarch64/hevcdsp_deblock_neon.S | 180 ++++++++++++++++++++++ > > libavcodec/aarch64/hevcdsp_init_aarch64.c | 18 +++ > > 3 files changed, 200 insertions(+), 1 deletion(-) > > create mode 100644 libavcodec/aarch64/hevcdsp_deblock_neon.S > > > > diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile > > index 02fb51c3ab..216191640c 100644 > > --- a/libavcodec/aarch64/Makefile > > +++ b/libavcodec/aarch64/Makefile > > @@ -65,7 +65,8 @@ NEON-OBJS-$(CONFIG_VP9_DECODER) += aarch64/vp9itxfm_16bpp_neon.o \ > > aarch64/vp9lpf_neon.o \ > > aarch64/vp9mc_16bpp_neon.o \ > > aarch64/vp9mc_neon.o > > -NEON-OBJS-$(CONFIG_HEVC_DECODER) += aarch64/hevcdsp_idct_neon.o \ > > +NEON-OBJS-$(CONFIG_HEVC_DECODER) += aarch64/hevcdsp_deblock_neon.o \ > > + aarch64/hevcdsp_idct_neon.o \ > > aarch64/hevcdsp_init_aarch64.o \ > > aarch64/hevcdsp_qpel_neon.o \ > > aarch64/hevcdsp_sao_neon.o > > diff --git a/libavcodec/aarch64/hevcdsp_deblock_neon.S b/libavcodec/aarch64/hevcdsp_deblock_neon.S > > new file mode 100644 > > index 0000000000..ed342e5ded > > --- /dev/null > > +++ b/libavcodec/aarch64/hevcdsp_deblock_neon.S > > @@ -0,0 +1,180 @@ > > +/* -*-arm64-*- > > + * vim: syntax=arm64asm > > + * > > + * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi> > > + * Copyright (c) 2023 J. Dekker <jdek@itanimul.li> > > + * > > + * This file is part of FFmpeg. > > + * > > + * FFmpeg is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU Lesser General Public > > + * License as published by the Free Software Foundation; either > > + * version 2.1 of the License, or (at your option) any later version. > > + * > > + * FFmpeg is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > + * Lesser General Public License for more details. > > + * > > + * You should have received a copy of the GNU Lesser General Public > > + * License along with FFmpeg; if not, write to the Free Software > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > > + */ > > + > > + > > +#include "libavutil/aarch64/asm.S" > > +#include "neon.S" > > + > > +.macro hevc_loop_filter_chroma_start bitdepth > > + mov x4, x30 > > + ldr w14, [x2] > > + ldr w15, [x2, #4] > > +.if \bitdepth > 8 > > + lsl w14, w14, #(\bitdepth - 8) > > + lsl w15, w15, #(\bitdepth - 8) > > +.endif > > + adds w2, w14, w15 > > + b.eq 1f > > + dup v16.4h, w14 > > + dup v17.4h, w15 > > + trn1 v16.2d, v16.2d, v17.2d > > +.if \bitdepth > 8 > > + mvni v19.8h, #((0xff << (\bitdepth - 8)) & 0xff), lsl #8 > > + movi v18.8h, #0 > > +.endif > > + neg v17.8h, v16.8h > > +.endm > > + > > +.macro hevc_loop_filter_chroma_body bitdepth > > +.if \bitdepth <= 8 > > + uxtl v20.8h, v0.8b // p1 > > + uxtl v1.8h, v1.8b // p0 > > + uxtl v2.8h, v2.8b // q0 > > + uxtl v23.8h, v3.8b // q1 > > + va .req v20 > > + vb .req v23 > > +.else // required to specify both cases as we are unable to do: v0 .req v20 > > + va .req v0 > > + vb .req v3 > > +.endif > > + sub v5.8h, v2.8h, v1.8h // q0 - p0 > > + sub v6.8h, va.8h, vb.8h // p1 - q1 > > + shl v5.8h, v5.8h, #2 > > + add v5.8h, v6.8h, v5.8h > > + srshr v5.8h, v5.8h, #3 > > + clip v17.8h, v16.8h, v5.8h > > + sqadd v1.8h, v1.8h, v5.8h // p0 + delta > > + sqsub v2.8h, v2.8h, v5.8h // q0 - delta > > +.if \bitdepth <= 8 > > + sqxtun v1.8b, v1.8h > > + sqxtun v2.8b, v2.8h > > +.unreq va > > +.unreq vb > > Shouldn't the .unreq be outside of the .if/.else? > > > +.else > > + clip v18.8h, v19.8h, v1.8h, v2.8h > > +.endif > > +.endm > > + > > +function hevc_loop_filter_chroma_body_8_neon, export=0 > > + hevc_loop_filter_chroma_body 8 > > + ret > > +endfunc > > + > > +function hevc_loop_filter_chroma_body_10_neon, export=0 > > +hevc_loop_filter_chroma_body_12_neon: > > + hevc_loop_filter_chroma_body 10 > > + ret > > +endfunc > > + > > +// void ff_hevc_h_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q); > > + > > +.macro hevc_h_loop_filter_chroma bitdepth > > +function ff_hevc_h_loop_filter_chroma_\bitdepth\()_neon, export=1 > > + hevc_loop_filter_chroma_start \bitdepth > > + sub x0, x0, x1, lsl #1 > > +.if \bitdepth > 8 > > + ld1 {v0.8h}, [x0], x1 > > + ld1 {v1.8h}, [x0], x1 > > + ld1 {v2.8h}, [x0], x1 > > + ld1 {v3.8h}, [x0] > > +.else > > + ld1 {v0.8b}, [x0], x1 > > + ld1 {v1.8b}, [x0], x1 > > + ld1 {v2.8b}, [x0], x1 > > + ld1 {v3.8b}, [x0] > > +.endif > > + sub x0, x0, x1, lsl #1 > > + bl hevc_loop_filter_chroma_body_\bitdepth\()_neon > > +.if \bitdepth > 8 > > + st1 {v1.8h}, [x0], x1 > > + st1 {v2.8h}, [x0] > > +.else > > + st1 {v1.8b}, [x0], x1 > > + st1 {v2.8b}, [x0] > > +.endif > > +1: ret x4 > > +endfunc > > +.endm > > + > > +.macro hevc_v_loop_filter_chroma bitdepth > > +function ff_hevc_v_loop_filter_chroma_\bitdepth\()_neon, export=1 > > + hevc_loop_filter_chroma_start \bitdepth > > + sub x0, x0, #((0x5200 >> \bitdepth) & 0x6) // high -> 4, low -> 2 > > TBH, I think this is rather obfuscated - I'd prefer to just move the sub > (and the two instructions inbetween) back inside of the .if/.else, to have > the sub instruction say more explicitly exactly what it does. > > Other than that, this patch LGTM now. > Thanks, pushed with changes. -- jd _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 1/2] checkasm: add hevc_deblock chroma test 2023-03-29 14:13 [FFmpeg-devel] [PATCH v4 1/2] checkasm: add hevc_deblock chroma test J. Dekker 2023-03-29 14:13 ` [FFmpeg-devel] [PATCH v4 2/2] lavc/aarch64: add hevc deblock chroma 8-12bit J. Dekker @ 2023-03-29 20:13 ` Martin Storsjö 2023-04-06 5:02 ` J. Dekker 1 sibling, 1 reply; 6+ messages in thread From: Martin Storsjö @ 2023-03-29 20:13 UTC (permalink / raw) To: FFmpeg development discussions and patches On Wed, 29 Mar 2023, J. Dekker wrote: > Signed-off-by: J. Dekker <jdek@itanimul.li> > --- > > Will support other variants in the second version of these tests. > > tests/checkasm/Makefile | 2 +- > tests/checkasm/checkasm.c | 1 + > tests/checkasm/checkasm.h | 1 + > tests/checkasm/hevc_deblock.c | 100 ++++++++++++++++++++++++++++++++++ > tests/fate/checkasm.mak | 1 + > 5 files changed, 104 insertions(+), 1 deletion(-) > create mode 100644 tests/checkasm/hevc_deblock.c > > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile > index a6f06c7007..3e62a22bd6 100644 > --- a/tests/checkasm/Makefile > +++ b/tests/checkasm/Makefile > @@ -28,7 +28,7 @@ AVCODECOBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuvdsp.o > AVCODECOBJS-$(CONFIG_JPEG2000_DECODER) += jpeg2000dsp.o > AVCODECOBJS-$(CONFIG_OPUS_DECODER) += opusdsp.o > AVCODECOBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o > -AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_idct.o hevc_sao.o hevc_pel.o > +AVCODECOBJS-$(CONFIG_HEVC_DECODER) += hevc_add_res.o hevc_deblock.o hevc_idct.o hevc_sao.o hevc_pel.o > AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodsp.o > AVCODECOBJS-$(CONFIG_V210_DECODER) += v210dec.o > AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c > index e96d84a7da..c2184d260d 100644 > --- a/tests/checkasm/checkasm.c > +++ b/tests/checkasm/checkasm.c > @@ -116,6 +116,7 @@ static const struct { > #endif > #if CONFIG_HEVC_DECODER > { "hevc_add_res", checkasm_check_hevc_add_res }, > + { "hevc_deblock", checkasm_check_hevc_deblock }, > { "hevc_idct", checkasm_check_hevc_idct }, > { "hevc_pel", checkasm_check_hevc_pel }, > { "hevc_sao", checkasm_check_hevc_sao }, > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h > index 8744a81218..89c643e6a0 100644 > --- a/tests/checkasm/checkasm.h > +++ b/tests/checkasm/checkasm.h > @@ -60,6 +60,7 @@ void checkasm_check_h264dsp(void); > void checkasm_check_h264pred(void); > void checkasm_check_h264qpel(void); > void checkasm_check_hevc_add_res(void); > +void checkasm_check_hevc_deblock(void); > void checkasm_check_hevc_idct(void); > void checkasm_check_hevc_pel(void); > void checkasm_check_hevc_sao(void); > diff --git a/tests/checkasm/hevc_deblock.c b/tests/checkasm/hevc_deblock.c > new file mode 100644 > index 0000000000..f73e68e8a6 > --- /dev/null > +++ b/tests/checkasm/hevc_deblock.c > @@ -0,0 +1,100 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > + */ > + > +#include <string.h> > + > +#include "libavutil/intreadwrite.h" > +#include "libavutil/mem_internal.h" > + > +#include "libavcodec/avcodec.h" > +#include "libavcodec/hevcdsp.h" > + > +#include "checkasm.h" > + > +static const uint32_t pixel_mask[3] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff }; > + > +#define SIZEOF_PIXEL ((bit_depth + 7) / 8) > +#define BUF_STRIDE (8 * 2) > +#define BUF_LINES (8) > +#define BUF_OFFSET (BUF_STRIDE * BUF_LINES) > +#define BUF_SIZE (BUF_STRIDE * BUF_LINES + BUF_OFFSET * 2) > + > +#define randomize_buffers(buf0, buf1, size) \ > + do { \ > + uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \ > + int k; \ > + for (k = 0; k < size; k += 4) { \ > + uint32_t r = rnd() & mask; \ > + AV_WN32A(buf0 + k, r); \ > + AV_WN32A(buf1 + k, r); \ > + } \ > + } while (0) > + > +static void check_deblock_chroma(HEVCDSPContext h, int bit_depth) > +{ > + int32_t tc[2] = { 0, 0 }; > + // no_p, no_q can only be { 0,0 } for assembly functions, see deblocking_filter_CTB() in hevc_filter.c It's not strictly about assembly functions; there's just two separate function pointers, hevc_v_loop_filter_chroma and hevc_v_loop_filter_chroma_c - you can implement both in assembly. I guess the intent of the _c variant originally was that it wasn't the assembly version for whatever assembly implementation there was at the time, but we can support both in assembly too. (As a later step, we can probably template this test somehow to test both of them.) > + uint8_t no_p[2] = { 0, 0 }; > + uint8_t no_q[2] = { 0, 0 }; > + LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]); > + LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]); > + > + declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *pix, ptrdiff_t stride, int32_t *tc, uint8_t *no_p, uint8_t *no_q); > + > + if (check_func(h.hevc_h_loop_filter_chroma, "hevc_h_loop_filter_chroma%d", bit_depth)) { > + randomize_buffers(buf0, buf1, BUF_SIZE); > + for (int i = 0; i < 4; i++) { > + // see betatable[] in hevc_filter.c > + tc[0] = (rnd() & 63) + (rnd() & 1); > + tc[1] = (rnd() & 63) + (rnd() & 1); > + > + call_ref(buf0 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q); > + call_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q); > + if (memcmp(buf0, buf1, BUF_SIZE)) > + fail(); > + } > + bench_new(buf1 + BUF_OFFSET, BUF_STRIDE, tc, no_p, no_q); > + } > + > + if (check_func(h.hevc_v_loop_filter_chroma, "hevc_v_loop_filter_chroma%d", bit_depth)) { > + randomize_buffers(buf0, buf1, BUF_SIZE); > + for (int i = 0; i < 4; i++) { > + // see betatable[] in hevc_filter.c > + tc[0] = (rnd() & 63) + (rnd() & 1); > + tc[1] = (rnd() & 63) + (rnd() & 1); I wonder if it'd be better to test with a freshly randomized buffer instead of iterating over the same data multiple times? Worst case, the deblocked area converges to so little differences that the last few iterations don't make any difference. I.e., I'd suggest moving the randomize_buffers call into the loop here. Other than that, this patch LGTM. // Martin _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [FFmpeg-devel] [PATCH v4 1/2] checkasm: add hevc_deblock chroma test 2023-03-29 20:13 ` [FFmpeg-devel] [PATCH v4 1/2] checkasm: add hevc_deblock chroma test Martin Storsjö @ 2023-04-06 5:02 ` J. Dekker 0 siblings, 0 replies; 6+ messages in thread From: J. Dekker @ 2023-04-06 5:02 UTC (permalink / raw) To: FFmpeg development discussions and patches On Wed, Mar 29, 2023 at 11:13:15PM +0300, Martin Storsjö wrote: > On Wed, 29 Mar 2023, J. Dekker wrote: > > > Signed-off-by: J. Dekker <jdek@itanimul.li> > > --- > > > > Will support other variants in the second version of these tests. > > > > tests/checkasm/Makefile | 2 +- > > tests/checkasm/checkasm.c | 1 + > > tests/checkasm/checkasm.h | 1 + > > tests/checkasm/hevc_deblock.c | 100 ++++++++++++++++++++++++++++++++++ > > tests/fate/checkasm.mak | 1 + > > 5 files changed, 104 insertions(+), 1 deletion(-) > > create mode 100644 tests/checkasm/hevc_deblock.c > > > [...] > > Other than that, this patch LGTM. > > // Martin Pushed with changes, thanks for reviews -- jd _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-04-06 5:02 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2023-03-29 14:13 [FFmpeg-devel] [PATCH v4 1/2] checkasm: add hevc_deblock chroma test J. Dekker 2023-03-29 14:13 ` [FFmpeg-devel] [PATCH v4 2/2] lavc/aarch64: add hevc deblock chroma 8-12bit J. Dekker 2023-03-29 20:29 ` Martin Storsjö 2023-04-06 5:00 ` J. Dekker 2023-03-29 20:13 ` [FFmpeg-devel] [PATCH v4 1/2] checkasm: add hevc_deblock chroma test Martin Storsjö 2023-04-06 5:02 ` J. Dekker
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git