Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 1/4] Revert "lavc/aarch64: add hevc sao band 8x8 tiling"
@ 2022-01-05  8:31 Martin Storsjö
  2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 2/4] Revert "lavc/aarch64: add hevc sao edge 8x8" Martin Storsjö
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-05  8:31 UTC (permalink / raw)
  To: ffmpeg-devel

This reverts commit f63f9be37c799ddc835af358034630d31fb7db02, as
it breaks fate-hevc.
---
 libavcodec/aarch64/hevcdsp_init_aarch64.c |  6 +-----
 libavcodec/aarch64/hevcdsp_sao_neon.S     | 11 ++++-------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index 2002530266..b93cec9e44 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -77,11 +77,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
         c->idct_dc[1]                  = ff_hevc_idct_8x8_dc_8_neon;
         c->idct_dc[2]                  = ff_hevc_idct_16x16_dc_8_neon;
         c->idct_dc[3]                  = ff_hevc_idct_32x32_dc_8_neon;
-        c->sao_band_filter[0]          =
-        c->sao_band_filter[1]          =
-        c->sao_band_filter[2]          =
-        c->sao_band_filter[3]          =
-        c->sao_band_filter[4]          = ff_hevc_sao_band_filter_8x8_8_neon;
+        c->sao_band_filter[0]          = ff_hevc_sao_band_filter_8x8_8_neon;
         c->sao_edge_filter[0]          = ff_hevc_sao_edge_filter_8x8_8_neon;
         c->sao_edge_filter[1]          =
         c->sao_edge_filter[2]          =
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S
index d524323fe8..73b0b3b056 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -3,7 +3,7 @@
  *
  * AArch64 NEON optimised SAO functions for HEVC decoding
  *
- * Copyright (c) 2020-2021  J. Dekker <jdek@itanimul.li>
+ * Copyright (c) 2020 Josh Dekker <josh@itanimul.li>
  *
  * This file is part of FFmpeg.
  *
@@ -35,7 +35,6 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
         stp             xzr, xzr, [sp, #32]
         stp             xzr, xzr, [sp, #48]
         mov             w8,  #4
-        sxtw            x6,  w6
 0:      ldrsh           x9, [x4,  x8, lsl #1]      // sao_offset_val[k+1]
         subs            w8,  w8,  #1
         add             w10, w8,  w5               // k + sao_left_class
@@ -44,9 +43,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
         bne             0b
         ld1             {v16.16b-v19.16b}, [sp], #64
         movi            v20.8h,   #1
-        sub             x2,  x2, x6                // stride_dst - width
-        sub             x3,  x3, x6                // stride_src - width
-1:      mov             x8,  x6                    // beginning of line
+1:      mov             w8,  w6                    // beginning of line
 2:      // Simple layout for accessing 16bit values
         // with 8bit LUT.
         //
@@ -55,7 +52,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
         // |xDE#xAD|xCA#xFE|xBE#xEF|xFE#xED|....
         // +----------------------------------->
         //    i-0     i-1     i-2     i-3
-        ld1             {v2.8b}, [x1], #8          // dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
+        ld1             {v2.8b}, [x1]              // dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
         uxtl            v0.8h,  v2.8b              // load src[x]
         ushr            v2.8h,  v0.8h, #3          // >> BIT_DEPTH - 3
         shl             v1.8h,  v2.8h, #1          // low (x2, accessing short)
@@ -64,7 +61,7 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
         tbx             v2.16b, {v16.16b-v19.16b}, v1.16b // table
         add             v1.8h,  v0.8h, v2.8h       // src[x] + table
         sqxtun          v4.8b,  v1.8h              // clip + narrow
-        st1             {v4.8b}, [x0], #8          // store
+        st1             {v4.8b}, [x0]              // store
         subs            w8, w8,  #8                // done 8 pixels
         bne             2b
         subs            w7, w7,  #1                // finished line, prep. new
-- 
2.25.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [FFmpeg-devel] [PATCH 2/4] Revert "lavc/aarch64: add hevc sao edge 8x8"
  2022-01-05  8:31 [FFmpeg-devel] [PATCH 1/4] Revert "lavc/aarch64: add hevc sao band 8x8 tiling" Martin Storsjö
@ 2022-01-05  8:31 ` Martin Storsjö
  2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 3/4] Revert "lavc/aarch64: add hevc sao edge 16x16" Martin Storsjö
  2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution Martin Storsjö
  2 siblings, 0 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-05  8:31 UTC (permalink / raw)
  To: ffmpeg-devel

This reverts commit c97ffc1a77ccaf901e642bd21ed26aaf75557745, as
it breaks fate-hevc.
---
 libavcodec/aarch64/hevcdsp_init_aarch64.c |  3 --
 libavcodec/aarch64/hevcdsp_sao_neon.S     | 51 -----------------------
 2 files changed, 54 deletions(-)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index b93cec9e44..747ff0412d 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -59,8 +59,6 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, uint8_t *_src,
                                   int width, int height);
 void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
                                           int16_t *sao_offset_val, int eo, int width, int height);
-void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
-                                          int16_t *sao_offset_val, int eo, int width, int height);
 
 av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
 {
@@ -78,7 +76,6 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
         c->idct_dc[2]                  = ff_hevc_idct_16x16_dc_8_neon;
         c->idct_dc[3]                  = ff_hevc_idct_32x32_dc_8_neon;
         c->sao_band_filter[0]          = ff_hevc_sao_band_filter_8x8_8_neon;
-        c->sao_edge_filter[0]          = ff_hevc_sao_edge_filter_8x8_8_neon;
         c->sao_edge_filter[1]          =
         c->sao_edge_filter[2]          =
         c->sao_edge_filter[3]          =
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S
index 73b0b3b056..b8f0c65acc 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -135,54 +135,3 @@ function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
         // no lines to filter
         ret
 endfunc
-
-// ff_hevc_sao_edge_filter_8x8_8_neon(char *dst, char *src, ptrdiff stride_dst,
-//                                    int16 *sao_offset_val, int eo, int width, int height)
-function ff_hevc_sao_edge_filter_8x8_8_neon, export=1
-        adr             x7, .Lsao_edge_pos
-        ldr             w4, [x7, w4, uxtw #2]
-        ld1             {v3.8h}, [x3]
-        mov             v3.h[7], v3.h[0]
-        mov             v3.h[0], v3.h[1]
-        mov             v3.h[1], v3.h[2]
-        mov             v3.h[2], v3.h[7]
-        uzp2            v1.16b, v3.16b, v3.16b
-        uzp1            v0.16b, v3.16b, v3.16b
-        movi            v2.16b, #2
-        add             x16, x0, x2
-        lsl             x2,  x2, #1
-        mov             x15, #192
-        mov             x8,  x1
-        sub             x9,  x1, x4
-        add             x10, x1, x4
-        mov             x17, #4
-1:      ld1             {v3.d}[0], [ x8], x15
-        ld1             {v4.d}[0], [ x9], x15
-        ld1             {v5.d}[0], [x10], x15
-        ld1             {v3.d}[1], [ x8], x15
-        ld1             {v4.d}[1], [ x9], x15
-        ld1             {v5.d}[1], [x10], x15
-        cmhi            v16.16b, v4.16b, v3.16b
-        cmhi            v17.16b, v3.16b, v4.16b
-        cmhi            v18.16b, v5.16b, v3.16b
-        cmhi            v19.16b, v3.16b, v5.16b
-        sub             v20.16b, v16.16b, v17.16b
-        sub             v21.16b, v18.16b, v19.16b
-        add             v20.16b, v20.16b, v21.16b
-        add             v20.16b, v20.16b, v2.16b
-        tbl             v16.16b, {v0.16b}, v20.16b
-        tbl             v17.16b, {v1.16b}, v20.16b
-        uxtl            v20.8h, v3.8b
-        uxtl2           v21.8h, v3.16b
-        zip1            v18.16b, v16.16b, v17.16b
-        zip2            v19.16b, v16.16b, v17.16b
-        sqadd           v20.8h, v18.8h, v20.8h
-        sqadd           v21.8h, v19.8h, v21.8h
-        sqxtun          v6.8b, v20.8h
-        sqxtun          v7.8b, v21.8h
-        st1             {v6.8b}, [ x0], x2
-        st1             {v7.8b}, [x16], x2
-        subs            x17, x17, #1
-        b.ne            1b
-        ret
-endfunc
-- 
2.25.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [FFmpeg-devel] [PATCH 3/4] Revert "lavc/aarch64: add hevc sao edge 16x16"
  2022-01-05  8:31 [FFmpeg-devel] [PATCH 1/4] Revert "lavc/aarch64: add hevc sao band 8x8 tiling" Martin Storsjö
  2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 2/4] Revert "lavc/aarch64: add hevc sao edge 8x8" Martin Storsjö
@ 2022-01-05  8:31 ` Martin Storsjö
  2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution Martin Storsjö
  2 siblings, 0 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-05  8:31 UTC (permalink / raw)
  To: ffmpeg-devel

This reverts commit a9214a2ca31c9d54f893c5ac4004a5ff30a08d10, as
it breaks fate-hevc.
---
 libavcodec/aarch64/hevcdsp_init_aarch64.c |  8 +--
 libavcodec/aarch64/hevcdsp_sao_neon.S     | 65 -----------------------
 2 files changed, 2 insertions(+), 71 deletions(-)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index 747ff0412d..c785e46f79 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -57,8 +57,8 @@ void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, uint8_t *_src,
                                   ptrdiff_t stride_dst, ptrdiff_t stride_src,
                                   int16_t *sao_offset_val, int sao_left_class,
                                   int width, int height);
-void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst,
-                                          int16_t *sao_offset_val, int eo, int width, int height);
+
+
 
 av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
 {
@@ -76,10 +76,6 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
         c->idct_dc[2]                  = ff_hevc_idct_16x16_dc_8_neon;
         c->idct_dc[3]                  = ff_hevc_idct_32x32_dc_8_neon;
         c->sao_band_filter[0]          = ff_hevc_sao_band_filter_8x8_8_neon;
-        c->sao_edge_filter[1]          =
-        c->sao_edge_filter[2]          =
-        c->sao_edge_filter[3]          =
-        c->sao_edge_filter[4]          = ff_hevc_sao_edge_filter_16x16_8_neon;
     }
     if (bit_depth == 10) {
         c->add_residual[0]             = ff_hevc_add_residual_4x4_10_neon;
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S b/libavcodec/aarch64/hevcdsp_sao_neon.S
index b8f0c65acc..d523bf584d 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -70,68 +70,3 @@ function ff_hevc_sao_band_filter_8x8_8_neon, export=1
         bne             1b
         ret
 endfunc
-
-// ASSUMES STRIDE_SRC = 192
-.Lsao_edge_pos:
-.word 1 // horizontal
-.word 192 // vertical
-.word 192 + 1 // 45 degree
-.word 192 - 1 // 135 degree
-
-// ff_hevc_sao_edge_filter_16x16_8_neon(char *dst, char *src, ptrdiff stride_dst,
-//                                      int16 *sao_offset_val, int eo, int width, int height)
-function ff_hevc_sao_edge_filter_16x16_8_neon, export=1
-        adr             x7, .Lsao_edge_pos
-        ld1             {v3.8h}, [x3]              // load sao_offset_val
-        sxtw            x5, w5
-        ldr             w4, [x7, w4, uxtw #2]      // stride_src
-        mov             v3.h[7], v3.h[0]           // reorder to [1,2,0,3,4]
-        mov             v3.h[0], v3.h[1]
-        mov             v3.h[1], v3.h[2]
-        mov             v3.h[2], v3.h[7]
-        // split 16bit values into two tables
-        uzp2            v1.16b, v3.16b, v3.16b     // sao_offset_val -> upper
-        uzp1            v0.16b, v3.16b, v3.16b     // sao_offset_val -> lower
-        movi            v2.16b, #2
-        mov             x15, #192
-        // strides between end of line and next src/dst
-        sub             x15, x15, x5               // stride_src - width
-        sub             x16, x2, x5                // stride_dst - width
-        mov             x11, x1                    // copy base src
-1:      // new line
-        mov             x14, x5                    // copy width
-        sub             x12, x11, x4               // src_a (prev) = src - sao_edge_pos
-        add             x13, x11, x4               // src_b (next) = src + sao_edge_pos
-2:      // process 16 bytes
-        ld1             {v3.16b}, [x11], #16       // load src
-        ld1             {v4.16b}, [x12], #16       // load src_a (prev)
-        ld1             {v5.16b}, [x13], #16       // load src_b (next)
-        cmhi            v16.16b, v4.16b, v3.16b    // (prev > cur)
-        cmhi            v17.16b, v3.16b, v4.16b    // (cur > prev)
-        cmhi            v18.16b, v5.16b, v3.16b    // (next > cur)
-        cmhi            v19.16b, v3.16b, v5.16b    // (cur > next)
-        sub             v20.16b, v16.16b, v17.16b  // diff0 = CMP(cur, prev) = (cur > prev) - (cur < prev)
-        sub             v21.16b, v18.16b, v19.16b  // diff1 = CMP(cur, next) = (cur > next) - (cur < next)
-        add             v20.16b, v20.16b, v21.16b  // diff = diff0 + diff1
-        add             v20.16b, v20.16b, v2.16b   // offset_val = diff + 2
-        tbl             v16.16b, {v0.16b}, v20.16b
-        tbl             v17.16b, {v1.16b}, v20.16b
-        uxtl            v20.8h, v3.8b              // src[0:7]
-        uxtl2           v21.8h, v3.16b             // src[7:15]
-        zip1            v18.16b, v16.16b, v17.16b  // sao_offset_val lower ->
-        zip2            v19.16b, v16.16b, v17.16b  // sao_offset_val upper ->
-        sqadd           v20.8h, v18.8h, v20.8h     // + sao_offset_val
-        sqadd           v21.8h, v19.8h, v21.8h
-        sqxtun          v3.8b, v20.8h
-        sqxtun2         v3.16b, v21.8h
-        st1             {v3.16b}, [x0], #16
-        subs            x14, x14, #16              // filtered 16 bytes
-        b.ne            2b                         // do we have width to filter?
-        // no width to filter, setup next line
-        add             x11, x11, x15              // stride src to next line
-        add             x0, x0, x16                // stride dst to next line
-        subs            w6, w6, #1                 // filtered line
-        b.ne            1b                         // do we have lines to process?
-        // no lines to filter
-        ret
-endfunc
-- 
2.25.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
  2022-01-05  8:31 [FFmpeg-devel] [PATCH 1/4] Revert "lavc/aarch64: add hevc sao band 8x8 tiling" Martin Storsjö
  2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 2/4] Revert "lavc/aarch64: add hevc sao edge 8x8" Martin Storsjö
  2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 3/4] Revert "lavc/aarch64: add hevc sao edge 16x16" Martin Storsjö
@ 2022-01-05  8:31 ` Martin Storsjö
  2022-01-05 12:08   ` James Almer
  2022-01-06 23:30   ` Martin Storsjö
  2 siblings, 2 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-05  8:31 UTC (permalink / raw)
  To: ffmpeg-devel

While this function on its own passes all of fate-hevc, there's
indications that the function might need to handle widths that
aren't a multiple of 8 (noted in commit
f63f9be37c799ddc835af358034630d31fb7db02, which later was
reverted).
---
 libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index c785e46f79..1e40be740c 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -75,7 +75,11 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
         c->idct_dc[1]                  = ff_hevc_idct_8x8_dc_8_neon;
         c->idct_dc[2]                  = ff_hevc_idct_16x16_dc_8_neon;
         c->idct_dc[3]                  = ff_hevc_idct_32x32_dc_8_neon;
-        c->sao_band_filter[0]          = ff_hevc_sao_band_filter_8x8_8_neon;
+        // This function is disabled, as it doesn't handle widths that aren't
+        // an even multiple of 8 correctly. fate-hevc doesn't exercise that
+        // for the current size, but if enabled for bigger sizes, the cases
+        // of non-multiple of 8 seem to arise.
+//        c->sao_band_filter[0]          = ff_hevc_sao_band_filter_8x8_8_neon;
     }
     if (bit_depth == 10) {
         c->add_residual[0]             = ff_hevc_add_residual_4x4_10_neon;
-- 
2.25.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
  2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution Martin Storsjö
@ 2022-01-05 12:08   ` James Almer
  2022-01-05 12:15     ` Martin Storsjö
  2022-01-06 23:30   ` Martin Storsjö
  1 sibling, 1 reply; 9+ messages in thread
From: James Almer @ 2022-01-05 12:08 UTC (permalink / raw)
  To: ffmpeg-devel



On 1/5/2022 5:31 AM, Martin Storsjö wrote:
> While this function on its own passes all of fate-hevc, there's
> indications that the function might need to handle widths that
> aren't a multiple of 8 (noted in commit
> f63f9be37c799ddc835af358034630d31fb7db02, which later was
> reverted).
> ---
>   libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
>   1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
> index c785e46f79..1e40be740c 100644
> --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
> +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
> @@ -75,7 +75,11 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
>           c->idct_dc[1]                  = ff_hevc_idct_8x8_dc_8_neon;
>           c->idct_dc[2]                  = ff_hevc_idct_16x16_dc_8_neon;
>           c->idct_dc[3]                  = ff_hevc_idct_32x32_dc_8_neon;
> -        c->sao_band_filter[0]          = ff_hevc_sao_band_filter_8x8_8_neon;
> +        // This function is disabled, as it doesn't handle widths that aren't
> +        // an even multiple of 8 correctly. fate-hevc doesn't exercise that
> +        // for the current size, but if enabled for bigger sizes, the cases
> +        // of non-multiple of 8 seem to arise.

Can the checkasm test be extended to cover these cases?

> +//        c->sao_band_filter[0]          = ff_hevc_sao_band_filter_8x8_8_neon;
>       }
>       if (bit_depth == 10) {
>           c->add_residual[0]             = ff_hevc_add_residual_4x4_10_neon;
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
  2022-01-05 12:08   ` James Almer
@ 2022-01-05 12:15     ` Martin Storsjö
  0 siblings, 0 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-05 12:15 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Wed, 5 Jan 2022, James Almer wrote:

>
>
> On 1/5/2022 5:31 AM, Martin Storsjö wrote:
>> While this function on its own passes all of fate-hevc, there's
>> indications that the function might need to handle widths that
>> aren't a multiple of 8 (noted in commit
>> f63f9be37c799ddc835af358034630d31fb7db02, which later was
>> reverted).
>> ---
>>   libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
>>   1 file changed, 5 insertions(+), 1 deletion(-)
>> 
>> diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c 
> b/libavcodec/aarch64/hevcdsp_init_aarch64.c
>> index c785e46f79..1e40be740c 100644
>> --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
>> +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
>> @@ -75,7 +75,11 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, 
> const int bit_depth)
>>           c->idct_dc[1]                  = ff_hevc_idct_8x8_dc_8_neon;
>>           c->idct_dc[2]                  = ff_hevc_idct_16x16_dc_8_neon;
>>           c->idct_dc[3]                  = ff_hevc_idct_32x32_dc_8_neon;
>> -        c->sao_band_filter[0]          = 
> ff_hevc_sao_band_filter_8x8_8_neon;
>> +        // This function is disabled, as it doesn't handle widths that 
> aren't
>> +        // an even multiple of 8 correctly. fate-hevc doesn't exercise 
> that
>> +        // for the current size, but if enabled for bigger sizes, the 
> cases
>> +        // of non-multiple of 8 seem to arise.
>
> Can the checkasm test be extended to cover these cases?

Yes, ideally - but I'm not that familiar with it (from the decoder point 
of view, what argument combinations are valid and expected) and I don't 
quite have bandwidth to take it on right now, so yes - patch welcome.

// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
  2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution Martin Storsjö
  2022-01-05 12:08   ` James Almer
@ 2022-01-06 23:30   ` Martin Storsjö
  2022-01-07 21:01     ` Martin Storsjö
  2022-01-09 17:15     ` Andriy Gelman
  1 sibling, 2 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-06 23:30 UTC (permalink / raw)
  To: ffmpeg-devel

On Wed, 5 Jan 2022, Martin Storsjö wrote:

> While this function on its own passes all of fate-hevc, there's
> indications that the function might need to handle widths that
> aren't a multiple of 8 (noted in commit
> f63f9be37c799ddc835af358034630d31fb7db02, which later was
> reverted).
> ---
> libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)

As fate-hevc still is broken on aarch64, is anyone opposed to pushing 
these reverts until the things is sorted out?

I'm all for fixing things, but we don't need to leave things broken in git 
master while doing so.

// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
  2022-01-06 23:30   ` Martin Storsjö
@ 2022-01-07 21:01     ` Martin Storsjö
  2022-01-09 17:15     ` Andriy Gelman
  1 sibling, 0 replies; 9+ messages in thread
From: Martin Storsjö @ 2022-01-07 21:01 UTC (permalink / raw)
  To: ffmpeg-devel

On Fri, 7 Jan 2022, Martin Storsjö wrote:

> On Wed, 5 Jan 2022, Martin Storsjö wrote:
>
>> While this function on its own passes all of fate-hevc, there's
>> indications that the function might need to handle widths that
>> aren't a multiple of 8 (noted in commit
>> f63f9be37c799ddc835af358034630d31fb7db02, which later was
>> reverted).
>> ---
>> libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
>> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> As fate-hevc still is broken on aarch64, is anyone opposed to pushing these 
> reverts until the things is sorted out?

Pushed these reverts now, plus I also pushed patch 4/4 to the 5.0 release 
branch.

// Martin
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution
  2022-01-06 23:30   ` Martin Storsjö
  2022-01-07 21:01     ` Martin Storsjö
@ 2022-01-09 17:15     ` Andriy Gelman
  1 sibling, 0 replies; 9+ messages in thread
From: Andriy Gelman @ 2022-01-09 17:15 UTC (permalink / raw)
  To: FFmpeg development discussions and patches; +Cc: Kieran Kunhya

On Fri, 07. Jan 01:30, Martin Storsjö wrote:
> On Wed, 5 Jan 2022, Martin Storsjö wrote:
> 
> > While this function on its own passes all of fate-hevc, there's
> > indications that the function might need to handle widths that
> > aren't a multiple of 8 (noted in commit
> > f63f9be37c799ddc835af358034630d31fb7db02, which later was
> > reverted).
> > ---
> > libavcodec/aarch64/hevcdsp_init_aarch64.c | 6 +++++-
> > 1 file changed, 5 insertions(+), 1 deletion(-)

> 
> As fate-hevc still is broken on aarch64, is anyone opposed to pushing these
> reverts until the things is sorted out?
> 
> I'm all for fixing things, but we don't need to leave things broken in git
> master while doing so.
> 

I can set up a patchwork CI runner on the FFmpeg M1 machine to catch these in
the future.
Kieran, can you set up a user for me?

Thanks,
-- 
Andriy
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-01-09 17:15 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-05  8:31 [FFmpeg-devel] [PATCH 1/4] Revert "lavc/aarch64: add hevc sao band 8x8 tiling" Martin Storsjö
2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 2/4] Revert "lavc/aarch64: add hevc sao edge 8x8" Martin Storsjö
2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 3/4] Revert "lavc/aarch64: add hevc sao edge 16x16" Martin Storsjö
2022-01-05  8:31 ` [FFmpeg-devel] [PATCH 4/4] aarch64: Disable ff_hevc_sao_band_filter_8x8_8_neon out of precaution Martin Storsjö
2022-01-05 12:08   ` James Almer
2022-01-05 12:15     ` Martin Storsjö
2022-01-06 23:30   ` Martin Storsjö
2022-01-07 21:01     ` Martin Storsjö
2022-01-09 17:15     ` Andriy Gelman

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git