* [FFmpeg-devel] [PATCH 1/4] Revert "lavc/aarch64: add hevc sao band 8x8 tiling"
@ 2022-01-05 8:31 Martin Storsjö
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 2/4] Revert "lavc/aarch64: add hevc sao edge 8x8" Martin Storsjö
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-05 8:31 UTC (permalink / raw)
To: ffmpeg-devel
This reverts commit f63f9be37c799ddc835af358034630d31fb7db02, as
it breaks fate-hevc.
---
libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +-----
libavcodec/aarch64/hevcdsp_sao_neon.S | 11 ++++-------
2 files changed, 5 insertions(+), 12 deletions(-)
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index 2002530266..b93cec9e44 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -77,11 +77,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
- c->sao_band_filter[0] =
- c->sao_band_filter[1] =
- c->sao_band_filter[2] =
- c->sao_band_filter[3] =
- c->sao_band_filter[4] = ff_hevc_sao_band_filter_8x8_8_neon;
+ c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_neon;
c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8x8_8_neon;
c->sao_edge_filter[1] =
c->sao_edge_filter[2] =
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S
index d524323fe8..73b0b3b056 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -3,7 +3,7 @@
*
* AArch64 NEON optimised SAO functions for HEVC decoding
*
- * Copyright (c) 2020-2021 J. Dekker <jdek@itanimul.li>
+ * Copyright (c) 2020 Josh Dekker <josh@itanimul.li>
*
* This file is part of FFmpeg.
*
@@ -35,7 +35,6 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
stp xzr, xzr, [sp, #32]
stp xzr, xzr, [sp, #48]
mov w8, #4
- sxtw x6, w6
0: ldrsh x9, [x4, x8, lsl #1] // sao_offset_val[k+1]
subs w8, w8, #1
add w10, w8, w5 // k + sao_left_class
@@ -44,9 +43,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
bne 0b
ld1 {v16.16b-v19.16b}, [sp], #64
movi v20.8h, #1
- sub x2, x2, x6 // stride_dst - width
- sub x3, x3, x6 // stride_src - width
-1: mov x8, x6 // beginning of line
+1: mov w8, w6 // beginning of line
2: // Simple layout for accessing 16bit values
// with 8bit LUT.
//
@@ -55,7 +52,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
// |xDE#xAD|xCA#xFE|xBE#xEF|xFE#xED|....
// +----------------------------------->
// i-0 i-1 i-2 i-3
- ld1 {v2.8b}, [x1], #8 // dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
+ ld1 {v2.8b}, [x1] // dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
uxtl v0.8h, v2.8b // load src[x]
ushr v2.8h, v0.8h, #3 // >> BIT_DEPTH - 3
shl v1.8h, v2.8h, #1 // low (x2, accessing short)
@@ -64,7 +61,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
tbx v2.16b, {v16.16b-v19.16b}, v1.16b // table
add v1.8h, v0.8h, v2.8h // src[x] + table
sqxtun v4.8b, v1.8h // clip + narrow
- st1 {v4.8b}, [x0], #8 // store
+ st1 {v4.8b}, [x0] // store
subs w8, w8, #8 // done 8 pixels
bne 2b
subs w7, w7, #1 // finished line, prep. new
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* [FFmpeg-devel] [PATCH 2/4] Revert "lavc/aarch64: add hevc sao edge 8x8"
2022-01-05 8:31 [FFmpeg-devel] [PATCH 1/4] Revert "lavc/aarch64: add hevc sao band 8x8 tiling" Martin Storsjö
@ 2022-01-05 8:31 ` Martin Storsjö
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 3/4] Revert "lavc/aarch64: add hevc sao edge 16x16" Martin Storsjö
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution Martin Storsjö
2 siblings, 0 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-05 8:31 UTC (permalink / raw)
To: ffmpeg-devel
This reverts commit c97ffc1a77ccaf901e642bd21ed26aaf75557745, as
it breaks fate-hevc.
---
libavcodec/aarch64/hevcdsp_init_aarch64.c | 3 --
libavcodec/aarch64/hevcdsp_sao_neon.S | 51 -----------------------
2 files changed, 54 deletions(-)
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index b93cec9e44..747ff0412d 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -59,8 +59,6 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, uint8_t *_src,
int width, int height);
void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
int16_t *sao_offset_val, int eo, int width, int height);
-void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
- int16_t *sao_offset_val, int eo, int width, int height);
av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
{
@@ -78,7 +76,6 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_neon;
- c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8x8_8_neon;
c->sao_edge_filter[1] =
c->sao_edge_filter[2] =
c->sao_edge_filter[3] =
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S
index 73b0b3b056..b8f0c65acc 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -135,54 +135,3 @@ function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
// no lines to filter
ret
endfunc
-
-// ff_hevc_sao_edge_filter_8x8_8_neon(char *dst, char *src, ptrdiff stride_dst,
-// int16 *sao_offset_val, int eo, int width, int height)
-function ff_hevc_sao_edge_filter_8x8_8_neon, export=1
- adr x7, .Lsao_edge_pos
- ldr w4, [x7, w4, uxtw #2]
- ld1 {v3.8h}, [x3]
- mov v3.h[7], v3.h[0]
- mov v3.h[0], v3.h[1]
- mov v3.h[1], v3.h[2]
- mov v3.h[2], v3.h[7]
- uzp2 v1.16b, v3.16b, v3.16b
- uzp1 v0.16b, v3.16b, v3.16b
- movi v2.16b, #2
- add x16, x0, x2
- lsl x2, x2, #1
- mov x15, #192
- mov x8, x1
- sub x9, x1, x4
- add x10, x1, x4
- mov x17, #4
-1: ld1 {v3.d}[0], [ x8], x15
- ld1 {v4.d}[0], [ x9], x15
- ld1 {v5.d}[0], [x10], x15
- ld1 {v3.d}[1], [ x8], x15
- ld1 {v4.d}[1], [ x9], x15
- ld1 {v5.d}[1], [x10], x15
- cmhi v16.16b, v4.16b, v3.16b
- cmhi v17.16b, v3.16b, v4.16b
- cmhi v18.16b, v5.16b, v3.16b
- cmhi v19.16b, v3.16b, v5.16b
- sub v20.16b, v16.16b, v17.16b
- sub v21.16b, v18.16b, v19.16b
- add v20.16b, v20.16b, v21.16b
- add v20.16b, v20.16b, v2.16b
- tbl v16.16b, {v0.16b}, v20.16b
- tbl v17.16b, {v1.16b}, v20.16b
- uxtl v20.8h, v3.8b
- uxtl2 v21.8h, v3.16b
- zip1 v18.16b, v16.16b, v17.16b
- zip2 v19.16b, v16.16b, v17.16b
- sqadd v20.8h, v18.8h, v20.8h
- sqadd v21.8h, v19.8h, v21.8h
- sqxtun v6.8b, v20.8h
- sqxtun v7.8b, v21.8h
- st1 {v6.8b}, [ x0], x2
- st1 {v7.8b}, [x16], x2
- subs x17, x17, #1
- b.ne 1b
- ret
-endfunc
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* [FFmpeg-devel] [PATCH 3/4] Revert "lavc/aarch64: add hevc sao edge 16x16"
2022-01-05 8:31 [FFmpeg-devel] [PATCH 1/4] Revert "lavc/aarch64: add hevc sao band 8x8 tiling" Martin Storsjö
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 2/4] Revert "lavc/aarch64: add hevc sao edge 8x8" Martin Storsjö
@ 2022-01-05 8:31 ` Martin Storsjö
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution Martin Storsjö
2 siblings, 0 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-05 8:31 UTC (permalink / raw)
To: ffmpeg-devel
This reverts commit a9214a2ca31c9d54f893c5ac4004a5ff30a08d10, as
it breaks fate-hevc.
---
libavcodec/aarch64/hevcdsp_init_aarch64.c | 8 +--
libavcodec/aarch64/hevcdsp_sao_neon.S | 65 -----------------------
2 files changed, 2 insertions(+), 71 deletions(-)
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index 747ff0412d..c785e46f79 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -57,8 +57,8 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, uint8_t *_src,
ptrdiff_t stride_dst, ptrdiff_t stride_src,
int16_t *sao_offset_val, int sao_left_class,
int width, int height);
-void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
- int16_t *sao_offset_val, int eo, int width, int height);
+
+
av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
{
@@ -76,10 +76,6 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_neon;
- c->sao_edge_filter[1] =
- c->sao_edge_filter[2] =
- c->sao_edge_filter[3] =
- c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_16x16_8_neon;
}
if (bit_depth == 10) {
c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S
index b8f0c65acc..d523bf584d 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -70,68 +70,3 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
bne 1b
ret
endfunc
-
-// ASSUMES STRIDE_SRC = 192
-.Lsao_edge_pos:
-.word 1 // horizontal
-.word 192 // vertical
-.word 192 + 1 // 45 degree
-.word 192 - 1 // 135 degree
-
-// ff_hevc_sao_edge_filter_16x16_8_neon(char *dst, char *src, ptrdiff stride_dst,
-// int16 *sao_offset_val, int eo, int width, int height)
-function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
- adr x7, .Lsao_edge_pos
- ld1 {v3.8h}, [x3] // load sao_offset_val
- sxtw x5, w5
- ldr w4, [x7, w4, uxtw #2] // stride_src
- mov v3.h[7], v3.h[0] // reorder to [1,2,0,3,4]
- mov v3.h[0], v3.h[1]
- mov v3.h[1], v3.h[2]
- mov v3.h[2], v3.h[7]
- // split 16bit values into two tables
- uzp2 v1.16b, v3.16b, v3.16b // sao_offset_val -> upper
- uzp1 v0.16b, v3.16b, v3.16b // sao_offset_val -> lower
- movi v2.16b, #2
- mov x15, #192
- // strides between end of line and next src/dst
- sub x15, x15, x5 // stride_src - width
- sub x16, x2, x5 // stride_dst - width
- mov x11, x1 // copy base src
-1: // new line
- mov x14, x5 // copy width
- sub x12, x11, x4 // src_a (prev) = src - sao_edge_pos
- add x13, x11, x4 // src_b (next) = src + sao_edge_pos
-2: // process 16 bytes
- ld1 {v3.16b}, [x11], #16 // load src
- ld1 {v4.16b}, [x12], #16 // load src_a (prev)
- ld1 {v5.16b}, [x13], #16 // load src_b (next)
- cmhi v16.16b, v4.16b, v3.16b // (prev > cur)
- cmhi v17.16b, v3.16b, v4.16b // (cur > prev)
- cmhi v18.16b, v5.16b, v3.16b // (next > cur)
- cmhi v19.16b, v3.16b, v5.16b // (cur > next)
- sub v20.16b, v16.16b, v17.16b // diff0 = CMP(cur, prev) = (cur > prev) - (cur < prev)
- sub v21.16b, v18.16b, v19.16b // diff1 = CMP(cur, next) = (cur > next) - (cur < next)
- add v20.16b, v20.16b, v21.16b // diff = diff0 + diff1
- add v20.16b, v20.16b, v2.16b // offset_val = diff + 2
- tbl v16.16b, {v0.16b}, v20.16b
- tbl v17.16b, {v1.16b}, v20.16b
- uxtl v20.8h, v3.8b // src[0:7]
- uxtl2 v21.8h, v3.16b // src[7:15]
- zip1 v18.16b, v16.16b, v17.16b // sao_offset_val lower ->
- zip2 v19.16b, v16.16b, v17.16b // sao_offset_val upper ->
- sqadd v20.8h, v18.8h, v20.8h // + sao_offset_val
- sqadd v21.8h, v19.8h, v21.8h
- sqxtun v3.8b, v20.8h
- sqxtun2 v3.16b, v21.8h
- st1 {v3.16b}, [x0], #16
- subs x14, x14, #16 // filtered 16 bytes
- b.ne 2b // do we have width to filter?
- // no width to filter, setup next line
- add x11, x11, x15 // stride src to next line
- add x0, x0, x16 // stride dst to next line
- subs w6, w6, #1 // filtered line
- b.ne 1b // do we have lines to process?
- // no lines to filter
- ret
-endfunc
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
2022-01-05 8:31 [FFmpeg-devel] [PATCH 1/4] Revert "lavc/aarch64: add hevc sao band 8x8 tiling" Martin Storsjö
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 2/4] Revert "lavc/aarch64: add hevc sao edge 8x8" Martin Storsjö
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 3/4] Revert "lavc/aarch64: add hevc sao edge 16x16" Martin Storsjö
@ 2022-01-05 8:31 ` Martin Storsjö
2022-01-05 12:08 ` James Almer
2022-01-06 23:30 ` Martin Storsjö
2 siblings, 2 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-05 8:31 UTC (permalink / raw)
To: ffmpeg-devel
While this function on its own passes all of fate-hevc, there's
indications that the function might need to handle widths that
aren't a multiple of 8 (noted in commit
f63f9be37c799ddc835af358034630d31fb7db02, which later was
reverted).
---
libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index c785e46f79..1e40be740c 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -75,7 +75,11 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
- c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_neon;
+ // This function is disabled, as it doesn't handle widths that aren't
+ // an even multiple of 8 correctly. fate-hevc doesn't exercise that
+ // for the current size, but if enabled for bigger sizes, the cases
+ // of non-multiple of 8 seem to arise.
+// c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_neon;
}
if (bit_depth == 10) {
c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
--
2.25.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution Martin Storsjö
@ 2022-01-05 12:08 ` James Almer
2022-01-05 12:15 ` Martin Storsjö
2022-01-06 23:30 ` Martin Storsjö
1 sibling, 1 reply; 9+ messages in thread
From: James Almer @ 2022-01-05 12:08 UTC (permalink / raw)
To: ffmpeg-devel
On 1/5/2022 5:31 AM, Martin Storsjö wrote:
> While this function on its own passes all of fate-hevc, there's
> indications that the function might need to handle widths that
> aren't a multiple of 8 (noted in commit
> f63f9be37c799ddc835af358034630d31fb7db02, which later was
> reverted).
> ---
> libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
> index c785e46f79..1e40be740c 100644
> --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
> +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
> @@ -75,7 +75,11 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
> c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
> c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
> c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
> - c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_neon;
> + // This function is disabled, as it doesn't handle widths that aren't
> + // an even multiple of 8 correctly. fate-hevc doesn't exercise that
> + // for the current size, but if enabled for bigger sizes, the cases
> + // of non-multiple of 8 seem to arise.
Can the checkasm test be extended to cover these cases?
> +// c->sao_band_filter[0] = ff_hevc_sao_band_filter_8x8_8_neon;
> }
> if (bit_depth == 10) {
> c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
2022-01-05 12:08 ` James Almer
@ 2022-01-05 12:15 ` Martin Storsjö
0 siblings, 0 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-05 12:15 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On Wed, 5 Jan 2022, James Almer wrote:
>
>
> On 1/5/2022 5:31 AM, Martin Storsjö wrote:
>> While this function on its own passes all of fate-hevc, there's
>> indications that the function might need to handle widths that
>> aren't a multiple of 8 (noted in commit
>> f63f9be37c799ddc835af358034630d31fb7db02, which later was
>> reverted).
>> ---
>> libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
>> 1 file changed, 5 insertions(+), 1 deletion(-)
>>
>> diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c
> b/libavcodec/aarch64/hevcdsp_init_aarch64.c
>> index c785e46f79..1e40be740c 100644
>> --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
>> +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
>> @@ -75,7 +75,11 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c,
> const int bit_depth)
>> c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
>> c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
>> c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
>> - c->sao_band_filter[0] =
> ff_hevc_sao_band_filter_8x8_8_neon;
>> + // This function is disabled, as it doesn't handle widths that
> aren't
>> + // an even multiple of 8 correctly. fate-hevc doesn't exercise
> that
>> + // for the current size, but if enabled for bigger sizes, the
> cases
>> + // of non-multiple of 8 seem to arise.
>
> Can the checkasm test be extended to cover these cases?
Yes, ideally - but I'm not that familiar with it (from the decoder point
of view, what argument combinations are valid and expected) and I don't
quite have bandwidth to take it on right now, so yes - patch welcome.
// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution Martin Storsjö
2022-01-05 12:08 ` James Almer
@ 2022-01-06 23:30 ` Martin Storsjö
2022-01-07 21:01 ` Martin Storsjö
2022-01-09 17:15 ` Andriy Gelman
1 sibling, 2 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-06 23:30 UTC (permalink / raw)
To: ffmpeg-devel
On Wed, 5 Jan 2022, Martin Storsjö wrote:
> While this function on its own passes all of fate-hevc, there's
> indications that the function might need to handle widths that
> aren't a multiple of 8 (noted in commit
> f63f9be37c799ddc835af358034630d31fb7db02, which later was
> reverted).
> ---
> libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
As fate-hevc still is broken on aarch64, is anyone opposed to pushing
these reverts until the things is sorted out?
I'm all for fixing things, but we don't need to leave things broken in git
master while doing so.
// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
2022-01-06 23:30 ` Martin Storsjö
@ 2022-01-07 21:01 ` Martin Storsjö
2022-01-09 17:15 ` Andriy Gelman
1 sibling, 0 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-07 21:01 UTC (permalink / raw)
To: ffmpeg-devel
On Fri, 7 Jan 2022, Martin Storsjö wrote:
> On Wed, 5 Jan 2022, Martin Storsjö wrote:
>
>> While this function on its own passes all of fate-hevc, there's
>> indications that the function might need to handle widths that
>> aren't a multiple of 8 (noted in commit
>> f63f9be37c799ddc835af358034630d31fb7db02, which later was
>> reverted).
>> ---
>> libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
>> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> As fate-hevc still is broken on aarch64, is anyone opposed to pushing these
> reverts until the things is sorted out?
Pushed these reverts now, plus I also pushed patch 4/4 to the 5.0 release
branch.
// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
2022-01-06 23:30 ` Martin Storsjö
2022-01-07 21:01 ` Martin Storsjö
@ 2022-01-09 17:15 ` Andriy Gelman
1 sibling, 0 replies; 9+ messages in thread
From: Andriy Gelman @ 2022-01-09 17:15 UTC (permalink / raw)
To: FFmpeg development discussions and patches; +Cc: Kieran Kunhya
On Fri, 07. Jan 01:30, Martin Storsjö wrote:
> On Wed, 5 Jan 2022, Martin Storsjö wrote:
>
> > While this function on its own passes all of fate-hevc, there's
> > indications that the function might need to handle widths that
> > aren't a multiple of 8 (noted in commit
> > f63f9be37c799ddc835af358034630d31fb7db02, which later was
> > reverted).
> > ---
> > libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
> > 1 file changed, 5 insertions(+), 1 deletion(-)
>
> As fate-hevc still is broken on aarch64, is anyone opposed to pushing these
> reverts until the things is sorted out?
>
> I'm all for fixing things, but we don't need to leave things broken in git
> master while doing so.
>
I can set up a patchwork CI runner on the FFmpeg M1 machine to catch these in
the future.
Kieran, can you set up a user for me?
Thanks,
--
Andriy
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-01-09 17:15 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-05 8:31 [FFmpeg-devel] [PATCH 1/4] Revert "lavc/aarch64: add hevc sao band 8x8 tiling" Martin Storsjö
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 2/4] Revert "lavc/aarch64: add hevc sao edge 8x8" Martin Storsjö
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 3/4] Revert "lavc/aarch64: add hevc sao edge 16x16" Martin Storsjö
2022-01-05 8:31 ` [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution Martin Storsjö
2022-01-05 12:08 ` James Almer
2022-01-05 12:15 ` Martin Storsjö
2022-01-06 23:30 ` Martin Storsjö
2022-01-07 21:01 ` Martin Storsjö
2022-01-09 17:15 ` Andriy Gelman
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git