Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb()
@ 2022-07-01  5:34 Wenbin Chen
  2022-07-01  7:59 ` Xiang, Haihao
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Wenbin Chen @ 2022-07-01  5:34 UTC (permalink / raw)
  To: ffmpeg-devel

For 422 frames we should not use hard coded 8 to calculate mb size for
uv plane. Chroma shift should be taken into consideration to be
compatiple with different sampling format.

The error is reported by fate test when av_cpu_max_align() return 64
on the platform supporting AVX512. This is a hidden error and it is
exposed after commit 17a59a634c39b00a680c6ebbaea58db95594d13d.

mpeg2enc has a mechanism to reuse frames. When it computes SSE (sum of
squared error) on current mb, reconstructed mb will be wrote to the
previous mb space, so that the memory can be saved. However if the align
is 64, the frame is shared in somewhere else, so the frame cannot be
reused and a new frame to store reconstrued data is created. Because the
height of mb is wrong when compute sse on 422 frame, starting from the
second line of macro block, changed data is read when frame is reused
(we need to read row 16 rather than row 8 if frame is 422), and unchanged
data is read when frame is not reused (a new frame is created so the
original frame will not be changed).

That is why commit 17a59a634c39b00a680c6ebbaea58db95594d13d exposes this
issue, because it add av_cpu_max_align() and this function return 64 on
platform supporting AVX512 which lead to creating a frame in mpeg2enc,
and this lead to the different outputs.

Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
---
 libavcodec/mpegvideo_enc.c             | 29 +++++++++++++------
 tests/ref/seek/vsynth_lena-mpeg2-422   | 40 +++++++++++++-------------
 tests/ref/vsynth/vsynth1-mpeg2-422     |  8 +++---
 tests/ref/vsynth/vsynth2-mpeg2-422     |  8 +++---
 tests/ref/vsynth/vsynth3-mpeg2-422     |  8 +++---
 tests/ref/vsynth/vsynth_lena-mpeg2-422 |  8 +++---
 6 files changed, 56 insertions(+), 45 deletions(-)

diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index d6a85a037a..c9d9e2a764 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -2558,24 +2558,35 @@ static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, in
 static int sse_mb(MpegEncContext *s){
     int w= 16;
     int h= 16;
+    int chroma_mb_w = w >> s->chroma_x_shift;
+    int chroma_mb_h = h >> s->chroma_y_shift;
 
     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
 
     if(w==16 && h==16)
       if(s->avctx->mb_cmp == FF_CMP_NSSE){
-        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
-               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
-               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
+        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16,
+                               s->dest[0], s->linesize, 16) +
+               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
+                               s->dest[1], s->uvlinesize, chroma_mb_h) +
+               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
+                               s->dest[2], s->uvlinesize, chroma_mb_h);
       }else{
-        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
-               s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
-               s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
+        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16,
+                              s->dest[0], s->linesize, 16) +
+               s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
+                              s->dest[1], s->uvlinesize, chroma_mb_h) +
+               s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
+                              s->dest[2], s->uvlinesize, chroma_mb_h);
       }
     else
-        return  sse(s, s->new_picture->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
-               +sse(s, s->new_picture->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
-               +sse(s, s->new_picture->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
+        return  sse(s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16,
+                    s->dest[0], w, h, s->linesize) +
+                sse(s, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
+                    s->dest[1], w >> s->chroma_x_shift, h >> s->chroma_y_shift, s->uvlinesize) +
+                sse(s, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
+                    s->dest[2], w >> s->chroma_x_shift, h >> s->chroma_y_shift, s->uvlinesize);
 }
 
 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
diff --git a/tests/ref/seek/vsynth_lena-mpeg2-422 b/tests/ref/seek/vsynth_lena-mpeg2-422
index 06d8f7ac3a..1fc2bf93a0 100644
--- a/tests/ref/seek/vsynth_lena-mpeg2-422
+++ b/tests/ref/seek/vsynth_lena-mpeg2-422
@@ -1,46 +1,46 @@
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 17497
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 18339
 ret: 0         st:-1 flags:0  ts:-1.000000
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 17497
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 18339
 ret: 0         st:-1 flags:1  ts: 1.894167
-ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397 size: 19967
+ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242 size: 21479
 ret: 0         st: 0 flags:0  ts: 0.788334
-ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747 size: 22575
+ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086 size: 24774
 ret:-1         st: 0 flags:1  ts:-0.317499
 ret:-1         st:-1 flags:0  ts: 2.576668
 ret: 0         st:-1 flags:1  ts: 1.470835
-ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466 size: 21329
+ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154 size: 22845
 ret: 0         st: 0 flags:0  ts: 0.365002
-ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454 size: 28984
+ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763 size: 32204
 ret:-1         st: 0 flags:1  ts:-0.740831
 ret:-1         st:-1 flags:0  ts: 2.153336
 ret: 0         st:-1 flags:1  ts: 1.047503
-ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747 size: 22575
+ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086 size: 24774
 ret: 0         st: 0 flags:0  ts:-0.058330
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 17497
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 18339
 ret: 0         st: 0 flags:1  ts: 2.835837
-ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397 size: 19967
+ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242 size: 21479
 ret: 0         st:-1 flags:0  ts: 1.730004
-ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397 size: 19967
+ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242 size: 21479
 ret: 0         st:-1 flags:1  ts: 0.624171
-ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454 size: 28984
+ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763 size: 32204
 ret: 0         st: 0 flags:0  ts:-0.481662
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 17497
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 18339
 ret: 0         st: 0 flags:1  ts: 2.412505
-ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397 size: 19967
+ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242 size: 21479
 ret: 0         st:-1 flags:0  ts: 1.306672
-ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466 size: 21329
+ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154 size: 22845
 ret: 0         st:-1 flags:1  ts: 0.200839
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 17497
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 18339
 ret: 0         st: 0 flags:0  ts:-0.904994
-ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 17497
+ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size: 18339
 ret: 0         st: 0 flags:1  ts: 1.989173
-ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397 size: 19967
+ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242 size: 21479
 ret: 0         st:-1 flags:0  ts: 0.883340
-ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466 size: 21329
+ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154 size: 22845
 ret:-1         st:-1 flags:1  ts:-0.222493
 ret:-1         st: 0 flags:0  ts: 2.671674
 ret: 0         st: 0 flags:1  ts: 1.565841
-ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466 size: 21329
+ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154 size: 22845
 ret: 0         st:-1 flags:0  ts: 0.460008
-ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747 size: 22575
+ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086 size: 24774
 ret:-1         st:-1 flags:1  ts:-0.645825
diff --git a/tests/ref/vsynth/vsynth1-mpeg2-422 b/tests/ref/vsynth/vsynth1-mpeg2-422
index e936ba463e..a39b2b4dce 100644
--- a/tests/ref/vsynth/vsynth1-mpeg2-422
+++ b/tests/ref/vsynth/vsynth1-mpeg2-422
@@ -1,4 +1,4 @@
-6e135a1a27235a320311a932147846b4 *tests/data/fate/vsynth1-mpeg2-422.mpeg2video
-730780 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
-0273cd8463d1fc115378748239951560 *tests/data/fate/vsynth1-mpeg2-422.out.rawvideo
-stddev:   10.27 PSNR: 27.90 MAXDIFF:  162 bytes:  7603200/  7603200
+651f054730be04257498eeae08b3bdc5 *tests/data/fate/vsynth1-mpeg2-422.mpeg2video
+855859 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
+10fd55dbd10b08f9271eb89617766be8 *tests/data/fate/vsynth1-mpeg2-422.out.rawvideo
+stddev:    9.38 PSNR: 28.68 MAXDIFF:  178 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-mpeg2-422 b/tests/ref/vsynth/vsynth2-mpeg2-422
index ec7244f9f9..5da29b4eac 100644
--- a/tests/ref/vsynth/vsynth2-mpeg2-422
+++ b/tests/ref/vsynth/vsynth2-mpeg2-422
@@ -1,4 +1,4 @@
-b2fa9b73c3547191ecc01b8163abd4e5 *tests/data/fate/vsynth2-mpeg2-422.mpeg2video
-379164 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
-704f6a96f93c2409219bd48b74169041 *tests/data/fate/vsynth2-mpeg2-422.out.rawvideo
-stddev:    4.17 PSNR: 35.73 MAXDIFF:   70 bytes:  7603200/  7603200
+fcaf6242ca4b706d392e40d9631e3465 *tests/data/fate/vsynth2-mpeg2-422.mpeg2video
+383946 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
+474bb259f041bfb971691551f678fd09 *tests/data/fate/vsynth2-mpeg2-422.out.rawvideo
+stddev:    3.93 PSNR: 36.23 MAXDIFF:   69 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth3-mpeg2-422 b/tests/ref/vsynth/vsynth3-mpeg2-422
index 2247f286e6..e83c23635c 100644
--- a/tests/ref/vsynth/vsynth3-mpeg2-422
+++ b/tests/ref/vsynth/vsynth3-mpeg2-422
@@ -1,4 +1,4 @@
-4d108b861715f1fa010fd70baea91793 *tests/data/fate/vsynth3-mpeg2-422.mpeg2video
-68612 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
-73b16e906d07b6bbccf4b00d4a25302c *tests/data/fate/vsynth3-mpeg2-422.out.rawvideo
-stddev:    4.02 PSNR: 36.05 MAXDIFF:   46 bytes:    86700/    86700
+96dc854fc40a6410d4edc387b63ded1a *tests/data/fate/vsynth3-mpeg2-422.mpeg2video
+75445 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
+5337fc0bfb789919cb3325a02d48eb82 *tests/data/fate/vsynth3-mpeg2-422.out.rawvideo
+stddev:    3.24 PSNR: 37.90 MAXDIFF:   27 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth_lena-mpeg2-422 b/tests/ref/vsynth/vsynth_lena-mpeg2-422
index 5f11d4e7cd..85d598782c 100644
--- a/tests/ref/vsynth/vsynth_lena-mpeg2-422
+++ b/tests/ref/vsynth/vsynth_lena-mpeg2-422
@@ -1,4 +1,4 @@
-521ec92c0b8672011a43dd13db98c400 *tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
-356431 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
-51ca353620f85db8b5b1c56f1a275add *tests/data/fate/vsynth_lena-mpeg2-422.out.rawvideo
-stddev:    3.15 PSNR: 38.14 MAXDIFF:   49 bytes:  7603200/  7603200
+87e5c69775b363db649c253050536045 *tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
+361341 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
+93cefcf101e6d0814acfaa626a02be90 *tests/data/fate/vsynth_lena-mpeg2-422.out.rawvideo
+stddev:    2.97 PSNR: 38.67 MAXDIFF:   57 bytes:  7603200/  7603200
-- 
2.32.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb()
  2022-07-01  5:34 [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb() Wenbin Chen
@ 2022-07-01  7:59 ` Xiang, Haihao
  2022-07-01  8:12   ` Paul B Mahol
  2022-07-01 20:15 ` Michael Niedermayer
  2022-07-01 21:19 ` Soft Works
  2 siblings, 1 reply; 8+ messages in thread
From: Xiang, Haihao @ 2022-07-01  7:59 UTC (permalink / raw)
  To: ffmpeg-devel

On Fri, 2022-07-01 at 13:34 +0800, Wenbin Chen wrote:
> For 422 frames we should not use hard coded 8 to calculate mb size for
> uv plane. Chroma shift should be taken into consideration to be
> compatiple with different sampling format.
> 
> The error is reported by fate test when av_cpu_max_align() return 64
> on the platform supporting AVX512. This is a hidden error and it is
> exposed after commit 17a59a634c39b00a680c6ebbaea58db95594d13d.
> 
> mpeg2enc has a mechanism to reuse frames. When it computes SSE (sum of
> squared error) on current mb, reconstructed mb will be wrote to the
> previous mb space, so that the memory can be saved. However if the align
> is 64, the frame is shared in somewhere else, so the frame cannot be
> reused and a new frame to store reconstrued data is created. Because the
> height of mb is wrong when compute sse on 422 frame, starting from the
> second line of macro block, changed data is read when frame is reused
> (we need to read row 16 rather than row 8 if frame is 422), and unchanged
> data is read when frame is not reused (a new frame is created so the
> original frame will not be changed).
> 
> That is why commit 17a59a634c39b00a680c6ebbaea58db95594d13d exposes this
> issue, because it add av_cpu_max_align() and this function return 64 on
> platform supporting AVX512 which lead to creating a frame in mpeg2enc,
> and this lead to the different outputs.
> 
> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> ---
>  libavcodec/mpegvideo_enc.c             | 29 +++++++++++++------
>  tests/ref/seek/vsynth_lena-mpeg2-422   | 40 +++++++++++++-------------
>  tests/ref/vsynth/vsynth1-mpeg2-422     |  8 +++---
>  tests/ref/vsynth/vsynth2-mpeg2-422     |  8 +++---
>  tests/ref/vsynth/vsynth3-mpeg2-422     |  8 +++---
>  tests/ref/vsynth/vsynth_lena-mpeg2-422 |  8 +++---
>  6 files changed, 56 insertions(+), 45 deletions(-)
> 
> diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
> index d6a85a037a..c9d9e2a764 100644
> --- a/libavcodec/mpegvideo_enc.c
> +++ b/libavcodec/mpegvideo_enc.c
> @@ -2558,24 +2558,35 @@ static int sse(MpegEncContext *s, uint8_t *src1,
> uint8_t *src2, int w, int h, in
>  static int sse_mb(MpegEncContext *s){
>      int w= 16;
>      int h= 16;
> +    int chroma_mb_w = w >> s->chroma_x_shift;
> +    int chroma_mb_h = h >> s->chroma_y_shift;
>  
>      if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
>      if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
>  
>      if(w==16 && h==16)
>        if(s->avctx->mb_cmp == FF_CMP_NSSE){
> -        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x * 16 + s-
> >mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> -               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x *  8 + s-
> >mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> -               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x *  8 + s-
> >mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> +        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x * 16 + s-
> >mb_y * s->linesize * 16,
> +                               s->dest[0], s->linesize, 16) +
> +               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x *
> chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                               s->dest[1], s->uvlinesize, chroma_mb_h) +
> +               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x *
> chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                               s->dest[2], s->uvlinesize, chroma_mb_h);
>        }else{
> -        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s->mb_x * 16 +
> s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> -               s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x *  8 +
> s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> -               s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x *  8 +
> s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> +        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s->mb_x * 16 +
> s->mb_y * s->linesize * 16,
> +                              s->dest[0], s->linesize, 16) +
> +               s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x *
> chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                              s->dest[1], s->uvlinesize, chroma_mb_h) +
> +               s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x *
> chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                              s->dest[2], s->uvlinesize, chroma_mb_h);
>        }
>      else
> -        return  sse(s, s->new_picture->data[0] + s->mb_x*16 + s->mb_y*s-
> >linesize*16, s->dest[0], w, h, s->linesize)
> -               +sse(s, s->new_picture->data[1] + s->mb_x*8  + s->mb_y*s-
> >uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
> -               +sse(s, s->new_picture->data[2] + s->mb_x*8  + s->mb_y*s-
> >uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
> +        return  sse(s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s-
> >linesize * 16,
> +                    s->dest[0], w, h, s->linesize) +
> +                sse(s, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s-
> >mb_y * s->uvlinesize * chroma_mb_h,
> +                    s->dest[1], w >> s->chroma_x_shift, h >> s-
> >chroma_y_shift, s->uvlinesize) +
> +                sse(s, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s-
> >mb_y * s->uvlinesize * chroma_mb_h,
> +                    s->dest[2], w >> s->chroma_x_shift, h >> s-
> >chroma_y_shift, s->uvlinesize);
>  }
>  
>  static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
> diff --git a/tests/ref/seek/vsynth_lena-mpeg2-422
> b/tests/ref/seek/vsynth_lena-mpeg2-422
> index 06d8f7ac3a..1fc2bf93a0 100644
> --- a/tests/ref/seek/vsynth_lena-mpeg2-422
> +++ b/tests/ref/seek/vsynth_lena-mpeg2-422
> @@ -1,46 +1,46 @@
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 18339
>  ret: 0         st:-1 flags:0  ts:-1.000000
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 18339
>  ret: 0         st:-1 flags:1  ts: 1.894167
> -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397 size:
> 19967
> +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242 size:
> 21479
>  ret: 0         st: 0 flags:0  ts: 0.788334
> -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747 size:
> 22575
> +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086 size:
> 24774
>  ret:-1         st: 0 flags:1  ts:-0.317499
>  ret:-1         st:-1 flags:0  ts: 2.576668
>  ret: 0         st:-1 flags:1  ts: 1.470835
> -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466 size:
> 21329
> +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154 size:
> 22845
>  ret: 0         st: 0 flags:0  ts: 0.365002
> -ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454 size:
> 28984
> +ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763 size:
> 32204
>  ret:-1         st: 0 flags:1  ts:-0.740831
>  ret:-1         st:-1 flags:0  ts: 2.153336
>  ret: 0         st:-1 flags:1  ts: 1.047503
> -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747 size:
> 22575
> +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086 size:
> 24774
>  ret: 0         st: 0 flags:0  ts:-0.058330
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 18339
>  ret: 0         st: 0 flags:1  ts: 2.835837
> -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397 size:
> 19967
> +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242 size:
> 21479
>  ret: 0         st:-1 flags:0  ts: 1.730004
> -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397 size:
> 19967
> +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242 size:
> 21479
>  ret: 0         st:-1 flags:1  ts: 0.624171
> -ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454 size:
> 28984
> +ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763 size:
> 32204
>  ret: 0         st: 0 flags:0  ts:-0.481662
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 18339
>  ret: 0         st: 0 flags:1  ts: 2.412505
> -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397 size:
> 19967
> +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242 size:
> 21479
>  ret: 0         st:-1 flags:0  ts: 1.306672
> -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466 size:
> 21329
> +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154 size:
> 22845
>  ret: 0         st:-1 flags:1  ts: 0.200839
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 18339
>  ret: 0         st: 0 flags:0  ts:-0.904994
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0 size:
> 18339
>  ret: 0         st: 0 flags:1  ts: 1.989173
> -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397 size:
> 19967
> +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242 size:
> 21479
>  ret: 0         st:-1 flags:0  ts: 0.883340
> -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466 size:
> 21329
> +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154 size:
> 22845
>  ret:-1         st:-1 flags:1  ts:-0.222493
>  ret:-1         st: 0 flags:0  ts: 2.671674
>  ret: 0         st: 0 flags:1  ts: 1.565841
> -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466 size:
> 21329
> +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154 size:
> 22845
>  ret: 0         st:-1 flags:0  ts: 0.460008
> -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747 size:
> 22575
> +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086 size:
> 24774
>  ret:-1         st:-1 flags:1  ts:-0.645825
> diff --git a/tests/ref/vsynth/vsynth1-mpeg2-422 b/tests/ref/vsynth/vsynth1-
> mpeg2-422
> index e936ba463e..a39b2b4dce 100644
> --- a/tests/ref/vsynth/vsynth1-mpeg2-422
> +++ b/tests/ref/vsynth/vsynth1-mpeg2-422
> @@ -1,4 +1,4 @@
> -6e135a1a27235a320311a932147846b4 *tests/data/fate/vsynth1-mpeg2-
> 422.mpeg2video
> -730780 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
> -0273cd8463d1fc115378748239951560 *tests/data/fate/vsynth1-mpeg2-
> 422.out.rawvideo
> -stddev:   10.27 PSNR: 27.90 MAXDIFF:  162 bytes:  7603200/  7603200
> +651f054730be04257498eeae08b3bdc5 *tests/data/fate/vsynth1-mpeg2-
> 422.mpeg2video
> +855859 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
> +10fd55dbd10b08f9271eb89617766be8 *tests/data/fate/vsynth1-mpeg2-
> 422.out.rawvideo
> +stddev:    9.38 PSNR: 28.68 MAXDIFF:  178 bytes:  7603200/  7603200
> diff --git a/tests/ref/vsynth/vsynth2-mpeg2-422 b/tests/ref/vsynth/vsynth2-
> mpeg2-422
> index ec7244f9f9..5da29b4eac 100644
> --- a/tests/ref/vsynth/vsynth2-mpeg2-422
> +++ b/tests/ref/vsynth/vsynth2-mpeg2-422
> @@ -1,4 +1,4 @@
> -b2fa9b73c3547191ecc01b8163abd4e5 *tests/data/fate/vsynth2-mpeg2-
> 422.mpeg2video
> -379164 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
> -704f6a96f93c2409219bd48b74169041 *tests/data/fate/vsynth2-mpeg2-
> 422.out.rawvideo
> -stddev:    4.17 PSNR: 35.73 MAXDIFF:   70 bytes:  7603200/  7603200
> +fcaf6242ca4b706d392e40d9631e3465 *tests/data/fate/vsynth2-mpeg2-
> 422.mpeg2video
> +383946 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
> +474bb259f041bfb971691551f678fd09 *tests/data/fate/vsynth2-mpeg2-
> 422.out.rawvideo
> +stddev:    3.93 PSNR: 36.23 MAXDIFF:   69 bytes:  7603200/  7603200
> diff --git a/tests/ref/vsynth/vsynth3-mpeg2-422 b/tests/ref/vsynth/vsynth3-
> mpeg2-422
> index 2247f286e6..e83c23635c 100644
> --- a/tests/ref/vsynth/vsynth3-mpeg2-422
> +++ b/tests/ref/vsynth/vsynth3-mpeg2-422
> @@ -1,4 +1,4 @@
> -4d108b861715f1fa010fd70baea91793 *tests/data/fate/vsynth3-mpeg2-
> 422.mpeg2video
> -68612 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
> -73b16e906d07b6bbccf4b00d4a25302c *tests/data/fate/vsynth3-mpeg2-
> 422.out.rawvideo
> -stddev:    4.02 PSNR: 36.05 MAXDIFF:   46 bytes:    86700/    86700
> +96dc854fc40a6410d4edc387b63ded1a *tests/data/fate/vsynth3-mpeg2-
> 422.mpeg2video
> +75445 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
> +5337fc0bfb789919cb3325a02d48eb82 *tests/data/fate/vsynth3-mpeg2-
> 422.out.rawvideo
> +stddev:    3.24 PSNR: 37.90 MAXDIFF:   27 bytes:    86700/    86700
> diff --git a/tests/ref/vsynth/vsynth_lena-mpeg2-422
> b/tests/ref/vsynth/vsynth_lena-mpeg2-422
> index 5f11d4e7cd..85d598782c 100644
> --- a/tests/ref/vsynth/vsynth_lena-mpeg2-422
> +++ b/tests/ref/vsynth/vsynth_lena-mpeg2-422
> @@ -1,4 +1,4 @@
> -521ec92c0b8672011a43dd13db98c400 *tests/data/fate/vsynth_lena-mpeg2-
> 422.mpeg2video
> -356431 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
> -51ca353620f85db8b5b1c56f1a275add *tests/data/fate/vsynth_lena-mpeg2-
> 422.out.rawvideo
> -stddev:    3.15 PSNR: 38.14 MAXDIFF:   49 bytes:  7603200/  7603200
> +87e5c69775b363db649c253050536045 *tests/data/fate/vsynth_lena-mpeg2-
> 422.mpeg2video
> +361341 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
> +93cefcf101e6d0814acfaa626a02be90 *tests/data/fate/vsynth_lena-mpeg2-
> 422.out.rawvideo
> +stddev:    2.97 PSNR: 38.67 MAXDIFF:   57 bytes:  7603200/  7603200


Patch LGTM. `make fate-vsynth1-mpeg2-422` works again on my Ice Lake after
applying this patch. It was broken since commit 17a59a6, see 
https://patchwork.ffmpeg.org/project/ffmpeg/patch/20220221122715.424475-1-onemda@gmail.com/

Thanks
Haihao

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb()
  2022-07-01  7:59 ` Xiang, Haihao
@ 2022-07-01  8:12   ` Paul B Mahol
  2022-07-01  8:47     ` Chen, Wenbin
  0 siblings, 1 reply; 8+ messages in thread
From: Paul B Mahol @ 2022-07-01  8:12 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Fri, Jul 1, 2022 at 10:00 AM Xiang, Haihao <
haihao.xiang-at-intel.com@ffmpeg.org> wrote:

> On Fri, 2022-07-01 at 13:34 +0800, Wenbin Chen wrote:
> > For 422 frames we should not use hard coded 8 to calculate mb size for
> > uv plane. Chroma shift should be taken into consideration to be
> > compatiple with different sampling format.
> >
> > The error is reported by fate test when av_cpu_max_align() return 64
> > on the platform supporting AVX512. This is a hidden error and it is
> > exposed after commit 17a59a634c39b00a680c6ebbaea58db95594d13d.
> >
> > mpeg2enc has a mechanism to reuse frames. When it computes SSE (sum of
> > squared error) on current mb, reconstructed mb will be wrote to the
> > previous mb space, so that the memory can be saved. However if the align
> > is 64, the frame is shared in somewhere else, so the frame cannot be
> > reused and a new frame to store reconstrued data is created. Because the
> > height of mb is wrong when compute sse on 422 frame, starting from the
> > second line of macro block, changed data is read when frame is reused
> > (we need to read row 16 rather than row 8 if frame is 422), and unchanged
> > data is read when frame is not reused (a new frame is created so the
> > original frame will not be changed).
> >
> > That is why commit 17a59a634c39b00a680c6ebbaea58db95594d13d exposes this
> > issue, because it add av_cpu_max_align() and this function return 64 on
> > platform supporting AVX512 which lead to creating a frame in mpeg2enc,
> > and this lead to the different outputs.
> >
> > Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> > ---
> >  libavcodec/mpegvideo_enc.c             | 29 +++++++++++++------
> >  tests/ref/seek/vsynth_lena-mpeg2-422   | 40 +++++++++++++-------------
> >  tests/ref/vsynth/vsynth1-mpeg2-422     |  8 +++---
> >  tests/ref/vsynth/vsynth2-mpeg2-422     |  8 +++---
> >  tests/ref/vsynth/vsynth3-mpeg2-422     |  8 +++---
> >  tests/ref/vsynth/vsynth_lena-mpeg2-422 |  8 +++---
> >  6 files changed, 56 insertions(+), 45 deletions(-)
> >
> > diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
> > index d6a85a037a..c9d9e2a764 100644
> > --- a/libavcodec/mpegvideo_enc.c
> > +++ b/libavcodec/mpegvideo_enc.c
> > @@ -2558,24 +2558,35 @@ static int sse(MpegEncContext *s, uint8_t *src1,
> > uint8_t *src2, int w, int h, in
> >  static int sse_mb(MpegEncContext *s){
> >      int w= 16;
> >      int h= 16;
> > +    int chroma_mb_w = w >> s->chroma_x_shift;
> > +    int chroma_mb_h = h >> s->chroma_y_shift;
> >
> >      if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
> >      if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
> >
> >      if(w==16 && h==16)
> >        if(s->avctx->mb_cmp == FF_CMP_NSSE){
> > -        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x *
> 16 + s-
> > >mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> > -               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x *
> 8 + s-
> > >mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> > -               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x *
> 8 + s-
> > >mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> > +        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x *
> 16 + s-
> > >mb_y * s->linesize * 16,
> > +                               s->dest[0], s->linesize, 16) +
> > +               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x *
> > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > +                               s->dest[1], s->uvlinesize, chroma_mb_h) +
> > +               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x *
> > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > +                               s->dest[2], s->uvlinesize, chroma_mb_h);
> >        }else{
> > -        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s->mb_x *
> 16 +
> > s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> > -               s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x
> *  8 +
> > s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> > -               s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x
> *  8 +
> > s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> > +        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s->mb_x *
> 16 +
> > s->mb_y * s->linesize * 16,
> > +                              s->dest[0], s->linesize, 16) +
> > +               s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x *
> > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > +                              s->dest[1], s->uvlinesize, chroma_mb_h) +
> > +               s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x *
> > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > +                              s->dest[2], s->uvlinesize, chroma_mb_h);
> >        }
> >      else
> > -        return  sse(s, s->new_picture->data[0] + s->mb_x*16 + s->mb_y*s-
> > >linesize*16, s->dest[0], w, h, s->linesize)
> > -               +sse(s, s->new_picture->data[1] + s->mb_x*8  + s->mb_y*s-
> > >uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
> > -               +sse(s, s->new_picture->data[2] + s->mb_x*8  + s->mb_y*s-
> > >uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
> > +        return  sse(s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y
> * s-
> > >linesize * 16,
> > +                    s->dest[0], w, h, s->linesize) +
> > +                sse(s, s->new_picture->data[1] + s->mb_x * chroma_mb_w
> + s-
> > >mb_y * s->uvlinesize * chroma_mb_h,
> > +                    s->dest[1], w >> s->chroma_x_shift, h >> s-
> > >chroma_y_shift, s->uvlinesize) +
> > +                sse(s, s->new_picture->data[2] + s->mb_x * chroma_mb_w
> + s-
> > >mb_y * s->uvlinesize * chroma_mb_h,
> > +                    s->dest[2], w >> s->chroma_x_shift, h >> s-
> > >chroma_y_shift, s->uvlinesize);
> >  }
> >
> >  static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
> > diff --git a/tests/ref/seek/vsynth_lena-mpeg2-422
> > b/tests/ref/seek/vsynth_lena-mpeg2-422
> > index 06d8f7ac3a..1fc2bf93a0 100644
> > --- a/tests/ref/seek/vsynth_lena-mpeg2-422
> > +++ b/tests/ref/seek/vsynth_lena-mpeg2-422
> > @@ -1,46 +1,46 @@
> > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 17497
> > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 18339
> >  ret: 0         st:-1 flags:0  ts:-1.000000
> > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 17497
> > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 18339
> >  ret: 0         st:-1 flags:1  ts: 1.894167
> > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> size:
> > 19967
> > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> size:
> > 21479
> >  ret: 0         st: 0 flags:0  ts: 0.788334
> > -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> size:
> > 22575
> > +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> size:
> > 24774
> >  ret:-1         st: 0 flags:1  ts:-0.317499
> >  ret:-1         st:-1 flags:0  ts: 2.576668
> >  ret: 0         st:-1 flags:1  ts: 1.470835
> > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> size:
> > 21329
> > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> size:
> > 22845
> >  ret: 0         st: 0 flags:0  ts: 0.365002
> > -ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454
> size:
> > 28984
> > +ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763
> size:
> > 32204
> >  ret:-1         st: 0 flags:1  ts:-0.740831
> >  ret:-1         st:-1 flags:0  ts: 2.153336
> >  ret: 0         st:-1 flags:1  ts: 1.047503
> > -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> size:
> > 22575
> > +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> size:
> > 24774
> >  ret: 0         st: 0 flags:0  ts:-0.058330
> > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 17497
> > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 18339
> >  ret: 0         st: 0 flags:1  ts: 2.835837
> > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> size:
> > 19967
> > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> size:
> > 21479
> >  ret: 0         st:-1 flags:0  ts: 1.730004
> > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> size:
> > 19967
> > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> size:
> > 21479
> >  ret: 0         st:-1 flags:1  ts: 0.624171
> > -ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454
> size:
> > 28984
> > +ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763
> size:
> > 32204
> >  ret: 0         st: 0 flags:0  ts:-0.481662
> > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 17497
> > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 18339
> >  ret: 0         st: 0 flags:1  ts: 2.412505
> > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> size:
> > 19967
> > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> size:
> > 21479
> >  ret: 0         st:-1 flags:0  ts: 1.306672
> > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> size:
> > 21329
> > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> size:
> > 22845
> >  ret: 0         st:-1 flags:1  ts: 0.200839
> > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 17497
> > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 18339
> >  ret: 0         st: 0 flags:0  ts:-0.904994
> > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 17497
> > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size:
> > 18339
> >  ret: 0         st: 0 flags:1  ts: 1.989173
> > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> size:
> > 19967
> > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> size:
> > 21479
> >  ret: 0         st:-1 flags:0  ts: 0.883340
> > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> size:
> > 21329
> > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> size:
> > 22845
> >  ret:-1         st:-1 flags:1  ts:-0.222493
> >  ret:-1         st: 0 flags:0  ts: 2.671674
> >  ret: 0         st: 0 flags:1  ts: 1.565841
> > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> size:
> > 21329
> > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> size:
> > 22845
> >  ret: 0         st:-1 flags:0  ts: 0.460008
> > -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> size:
> > 22575
> > +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> size:
> > 24774
> >  ret:-1         st:-1 flags:1  ts:-0.645825
> > diff --git a/tests/ref/vsynth/vsynth1-mpeg2-422
> b/tests/ref/vsynth/vsynth1-
> > mpeg2-422
> > index e936ba463e..a39b2b4dce 100644
> > --- a/tests/ref/vsynth/vsynth1-mpeg2-422
> > +++ b/tests/ref/vsynth/vsynth1-mpeg2-422
> > @@ -1,4 +1,4 @@
> > -6e135a1a27235a320311a932147846b4 *tests/data/fate/vsynth1-mpeg2-
> > 422.mpeg2video
> > -730780 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
> > -0273cd8463d1fc115378748239951560 *tests/data/fate/vsynth1-mpeg2-
> > 422.out.rawvideo
> > -stddev:   10.27 PSNR: 27.90 MAXDIFF:  162 bytes:  7603200/  7603200
> > +651f054730be04257498eeae08b3bdc5 *tests/data/fate/vsynth1-mpeg2-
> > 422.mpeg2video
> > +855859 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
> > +10fd55dbd10b08f9271eb89617766be8 *tests/data/fate/vsynth1-mpeg2-
> > 422.out.rawvideo
> > +stddev:    9.38 PSNR: 28.68 MAXDIFF:  178 bytes:  7603200/  7603200
> > diff --git a/tests/ref/vsynth/vsynth2-mpeg2-422
> b/tests/ref/vsynth/vsynth2-
> > mpeg2-422
> > index ec7244f9f9..5da29b4eac 100644
> > --- a/tests/ref/vsynth/vsynth2-mpeg2-422
> > +++ b/tests/ref/vsynth/vsynth2-mpeg2-422
> > @@ -1,4 +1,4 @@
> > -b2fa9b73c3547191ecc01b8163abd4e5 *tests/data/fate/vsynth2-mpeg2-
> > 422.mpeg2video
> > -379164 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
> > -704f6a96f93c2409219bd48b74169041 *tests/data/fate/vsynth2-mpeg2-
> > 422.out.rawvideo
> > -stddev:    4.17 PSNR: 35.73 MAXDIFF:   70 bytes:  7603200/  7603200
> > +fcaf6242ca4b706d392e40d9631e3465 *tests/data/fate/vsynth2-mpeg2-
> > 422.mpeg2video
> > +383946 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
> > +474bb259f041bfb971691551f678fd09 *tests/data/fate/vsynth2-mpeg2-
> > 422.out.rawvideo
> > +stddev:    3.93 PSNR: 36.23 MAXDIFF:   69 bytes:  7603200/  7603200
> > diff --git a/tests/ref/vsynth/vsynth3-mpeg2-422
> b/tests/ref/vsynth/vsynth3-
> > mpeg2-422
> > index 2247f286e6..e83c23635c 100644
> > --- a/tests/ref/vsynth/vsynth3-mpeg2-422
> > +++ b/tests/ref/vsynth/vsynth3-mpeg2-422
> > @@ -1,4 +1,4 @@
> > -4d108b861715f1fa010fd70baea91793 *tests/data/fate/vsynth3-mpeg2-
> > 422.mpeg2video
> > -68612 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
> > -73b16e906d07b6bbccf4b00d4a25302c *tests/data/fate/vsynth3-mpeg2-
> > 422.out.rawvideo
> > -stddev:    4.02 PSNR: 36.05 MAXDIFF:   46 bytes:    86700/    86700
> > +96dc854fc40a6410d4edc387b63ded1a *tests/data/fate/vsynth3-mpeg2-
> > 422.mpeg2video
> > +75445 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
> > +5337fc0bfb789919cb3325a02d48eb82 *tests/data/fate/vsynth3-mpeg2-
> > 422.out.rawvideo
> > +stddev:    3.24 PSNR: 37.90 MAXDIFF:   27 bytes:    86700/    86700
> > diff --git a/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > b/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > index 5f11d4e7cd..85d598782c 100644
> > --- a/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > +++ b/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > @@ -1,4 +1,4 @@
> > -521ec92c0b8672011a43dd13db98c400 *tests/data/fate/vsynth_lena-mpeg2-
> > 422.mpeg2video
> > -356431 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
> > -51ca353620f85db8b5b1c56f1a275add *tests/data/fate/vsynth_lena-mpeg2-
> > 422.out.rawvideo
> > -stddev:    3.15 PSNR: 38.14 MAXDIFF:   49 bytes:  7603200/  7603200
> > +87e5c69775b363db649c253050536045 *tests/data/fate/vsynth_lena-mpeg2-
> > 422.mpeg2video
> > +361341 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
> > +93cefcf101e6d0814acfaa626a02be90 *tests/data/fate/vsynth_lena-mpeg2-
> > 422.out.rawvideo
> > +stddev:    2.97 PSNR: 38.67 MAXDIFF:   57 bytes:  7603200/  7603200
>
>
> Patch LGTM. `make fate-vsynth1-mpeg2-422` works again on my Ice Lake after
> applying this patch. It was broken since commit 17a59a6, see
>
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/20220221122715.424475-1-onemda@gmail.com/


But why PSNR got worse?


>
>
> Thanks
> Haihao
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb()
  2022-07-01  8:12   ` Paul B Mahol
@ 2022-07-01  8:47     ` Chen, Wenbin
  2022-07-01  8:55       ` Paul B Mahol
  0 siblings, 1 reply; 8+ messages in thread
From: Chen, Wenbin @ 2022-07-01  8:47 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

> On Fri, Jul 1, 2022 at 10:00 AM Xiang, Haihao <
> haihao.xiang-at-intel.com@ffmpeg.org> wrote:
> 
> > On Fri, 2022-07-01 at 13:34 +0800, Wenbin Chen wrote:
> > > For 422 frames we should not use hard coded 8 to calculate mb size for
> > > uv plane. Chroma shift should be taken into consideration to be
> > > compatiple with different sampling format.
> > >
> > > The error is reported by fate test when av_cpu_max_align() return 64
> > > on the platform supporting AVX512. This is a hidden error and it is
> > > exposed after commit 17a59a634c39b00a680c6ebbaea58db95594d13d.
> > >
> > > mpeg2enc has a mechanism to reuse frames. When it computes SSE (sum
> of
> > > squared error) on current mb, reconstructed mb will be wrote to the
> > > previous mb space, so that the memory can be saved. However if the
> align
> > > is 64, the frame is shared in somewhere else, so the frame cannot be
> > > reused and a new frame to store reconstrued data is created. Because
> the
> > > height of mb is wrong when compute sse on 422 frame, starting from the
> > > second line of macro block, changed data is read when frame is reused
> > > (we need to read row 16 rather than row 8 if frame is 422), and
> unchanged
> > > data is read when frame is not reused (a new frame is created so the
> > > original frame will not be changed).
> > >
> > > That is why commit 17a59a634c39b00a680c6ebbaea58db95594d13d
> exposes this
> > > issue, because it add av_cpu_max_align() and this function return 64 on
> > > platform supporting AVX512 which lead to creating a frame in mpeg2enc,
> > > and this lead to the different outputs.
> > >
> > > Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> > > ---
> > >  libavcodec/mpegvideo_enc.c             | 29 +++++++++++++------
> > >  tests/ref/seek/vsynth_lena-mpeg2-422   | 40 +++++++++++++-------------
> > >  tests/ref/vsynth/vsynth1-mpeg2-422     |  8 +++---
> > >  tests/ref/vsynth/vsynth2-mpeg2-422     |  8 +++---
> > >  tests/ref/vsynth/vsynth3-mpeg2-422     |  8 +++---
> > >  tests/ref/vsynth/vsynth_lena-mpeg2-422 |  8 +++---
> > >  6 files changed, 56 insertions(+), 45 deletions(-)
> > >
> > > diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
> > > index d6a85a037a..c9d9e2a764 100644
> > > --- a/libavcodec/mpegvideo_enc.c
> > > +++ b/libavcodec/mpegvideo_enc.c
> > > @@ -2558,24 +2558,35 @@ static int sse(MpegEncContext *s, uint8_t
> *src1,
> > > uint8_t *src2, int w, int h, in
> > >  static int sse_mb(MpegEncContext *s){
> > >      int w= 16;
> > >      int h= 16;
> > > +    int chroma_mb_w = w >> s->chroma_x_shift;
> > > +    int chroma_mb_h = h >> s->chroma_y_shift;
> > >
> > >      if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
> > >      if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
> > >
> > >      if(w==16 && h==16)
> > >        if(s->avctx->mb_cmp == FF_CMP_NSSE){
> > > -        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x *
> > 16 + s-
> > > >mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> > > -               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x *
> > 8 + s-
> > > >mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> > > -               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x *
> > 8 + s-
> > > >mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> > > +        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x *
> > 16 + s-
> > > >mb_y * s->linesize * 16,
> > > +                               s->dest[0], s->linesize, 16) +
> > > +               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x *
> > > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > > +                               s->dest[1], s->uvlinesize, chroma_mb_h) +
> > > +               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x *
> > > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > > +                               s->dest[2], s->uvlinesize, chroma_mb_h);
> > >        }else{
> > > -        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s->mb_x *
> > 16 +
> > > s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> > > -               s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x
> > *  8 +
> > > s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> > > -               s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x
> > *  8 +
> > > s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> > > +        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s->mb_x *
> > 16 +
> > > s->mb_y * s->linesize * 16,
> > > +                              s->dest[0], s->linesize, 16) +
> > > +               s->mecc.sse[1](NULL, s->new_picture->data[1] + s->mb_x *
> > > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > > +                              s->dest[1], s->uvlinesize, chroma_mb_h) +
> > > +               s->mecc.sse[1](NULL, s->new_picture->data[2] + s->mb_x *
> > > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > > +                              s->dest[2], s->uvlinesize, chroma_mb_h);
> > >        }
> > >      else
> > > -        return  sse(s, s->new_picture->data[0] + s->mb_x*16 + s->mb_y*s-
> > > >linesize*16, s->dest[0], w, h, s->linesize)
> > > -               +sse(s, s->new_picture->data[1] + s->mb_x*8  + s->mb_y*s-
> > > >uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
> > > -               +sse(s, s->new_picture->data[2] + s->mb_x*8  + s->mb_y*s-
> > > >uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
> > > +        return  sse(s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y
> > * s-
> > > >linesize * 16,
> > > +                    s->dest[0], w, h, s->linesize) +
> > > +                sse(s, s->new_picture->data[1] + s->mb_x * chroma_mb_w
> > + s-
> > > >mb_y * s->uvlinesize * chroma_mb_h,
> > > +                    s->dest[1], w >> s->chroma_x_shift, h >> s-
> > > >chroma_y_shift, s->uvlinesize) +
> > > +                sse(s, s->new_picture->data[2] + s->mb_x * chroma_mb_w
> > + s-
> > > >mb_y * s->uvlinesize * chroma_mb_h,
> > > +                    s->dest[2], w >> s->chroma_x_shift, h >> s-
> > > >chroma_y_shift, s->uvlinesize);
> > >  }
> > >
> > >  static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
> > > diff --git a/tests/ref/seek/vsynth_lena-mpeg2-422
> > > b/tests/ref/seek/vsynth_lena-mpeg2-422
> > > index 06d8f7ac3a..1fc2bf93a0 100644
> > > --- a/tests/ref/seek/vsynth_lena-mpeg2-422
> > > +++ b/tests/ref/seek/vsynth_lena-mpeg2-422
> > > @@ -1,46 +1,46 @@
> > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 17497
> > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 18339
> > >  ret: 0         st:-1 flags:0  ts:-1.000000
> > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 17497
> > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 18339
> > >  ret: 0         st:-1 flags:1  ts: 1.894167
> > > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> > size:
> > > 19967
> > > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> > size:
> > > 21479
> > >  ret: 0         st: 0 flags:0  ts: 0.788334
> > > -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> > size:
> > > 22575
> > > +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> > size:
> > > 24774
> > >  ret:-1         st: 0 flags:1  ts:-0.317499
> > >  ret:-1         st:-1 flags:0  ts: 2.576668
> > >  ret: 0         st:-1 flags:1  ts: 1.470835
> > > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> > size:
> > > 21329
> > > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> > size:
> > > 22845
> > >  ret: 0         st: 0 flags:0  ts: 0.365002
> > > -ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454
> > size:
> > > 28984
> > > +ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763
> > size:
> > > 32204
> > >  ret:-1         st: 0 flags:1  ts:-0.740831
> > >  ret:-1         st:-1 flags:0  ts: 2.153336
> > >  ret: 0         st:-1 flags:1  ts: 1.047503
> > > -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> > size:
> > > 22575
> > > +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> > size:
> > > 24774
> > >  ret: 0         st: 0 flags:0  ts:-0.058330
> > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 17497
> > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 18339
> > >  ret: 0         st: 0 flags:1  ts: 2.835837
> > > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> > size:
> > > 19967
> > > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> > size:
> > > 21479
> > >  ret: 0         st:-1 flags:0  ts: 1.730004
> > > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> > size:
> > > 19967
> > > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> > size:
> > > 21479
> > >  ret: 0         st:-1 flags:1  ts: 0.624171
> > > -ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454
> > size:
> > > 28984
> > > +ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763
> > size:
> > > 32204
> > >  ret: 0         st: 0 flags:0  ts:-0.481662
> > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 17497
> > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 18339
> > >  ret: 0         st: 0 flags:1  ts: 2.412505
> > > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> > size:
> > > 19967
> > > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> > size:
> > > 21479
> > >  ret: 0         st:-1 flags:0  ts: 1.306672
> > > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> > size:
> > > 21329
> > > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> > size:
> > > 22845
> > >  ret: 0         st:-1 flags:1  ts: 0.200839
> > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 17497
> > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 18339
> > >  ret: 0         st: 0 flags:0  ts:-0.904994
> > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 17497
> > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > size:
> > > 18339
> > >  ret: 0         st: 0 flags:1  ts: 1.989173
> > > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> > size:
> > > 19967
> > > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> > size:
> > > 21479
> > >  ret: 0         st:-1 flags:0  ts: 0.883340
> > > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> > size:
> > > 21329
> > > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> > size:
> > > 22845
> > >  ret:-1         st:-1 flags:1  ts:-0.222493
> > >  ret:-1         st: 0 flags:0  ts: 2.671674
> > >  ret: 0         st: 0 flags:1  ts: 1.565841
> > > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> > size:
> > > 21329
> > > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> > size:
> > > 22845
> > >  ret: 0         st:-1 flags:0  ts: 0.460008
> > > -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> > size:
> > > 22575
> > > +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> > size:
> > > 24774
> > >  ret:-1         st:-1 flags:1  ts:-0.645825
> > > diff --git a/tests/ref/vsynth/vsynth1-mpeg2-422
> > b/tests/ref/vsynth/vsynth1-
> > > mpeg2-422
> > > index e936ba463e..a39b2b4dce 100644
> > > --- a/tests/ref/vsynth/vsynth1-mpeg2-422
> > > +++ b/tests/ref/vsynth/vsynth1-mpeg2-422
> > > @@ -1,4 +1,4 @@
> > > -6e135a1a27235a320311a932147846b4 *tests/data/fate/vsynth1-mpeg2-
> > > 422.mpeg2video
> > > -730780 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
> > > -0273cd8463d1fc115378748239951560 *tests/data/fate/vsynth1-mpeg2-
> > > 422.out.rawvideo
> > > -stddev:   10.27 PSNR: 27.90 MAXDIFF:  162 bytes:  7603200/  7603200
> > > +651f054730be04257498eeae08b3bdc5 *tests/data/fate/vsynth1-mpeg2-
> > > 422.mpeg2video
> > > +855859 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
> > > +10fd55dbd10b08f9271eb89617766be8 *tests/data/fate/vsynth1-mpeg2-
> > > 422.out.rawvideo
> > > +stddev:    9.38 PSNR: 28.68 MAXDIFF:  178 bytes:  7603200/  7603200
> > > diff --git a/tests/ref/vsynth/vsynth2-mpeg2-422
> > b/tests/ref/vsynth/vsynth2-
> > > mpeg2-422
> > > index ec7244f9f9..5da29b4eac 100644
> > > --- a/tests/ref/vsynth/vsynth2-mpeg2-422
> > > +++ b/tests/ref/vsynth/vsynth2-mpeg2-422
> > > @@ -1,4 +1,4 @@
> > > -b2fa9b73c3547191ecc01b8163abd4e5 *tests/data/fate/vsynth2-mpeg2-
> > > 422.mpeg2video
> > > -379164 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
> > > -704f6a96f93c2409219bd48b74169041 *tests/data/fate/vsynth2-mpeg2-
> > > 422.out.rawvideo
> > > -stddev:    4.17 PSNR: 35.73 MAXDIFF:   70 bytes:  7603200/  7603200
> > > +fcaf6242ca4b706d392e40d9631e3465 *tests/data/fate/vsynth2-mpeg2-
> > > 422.mpeg2video
> > > +383946 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
> > > +474bb259f041bfb971691551f678fd09 *tests/data/fate/vsynth2-mpeg2-
> > > 422.out.rawvideo
> > > +stddev:    3.93 PSNR: 36.23 MAXDIFF:   69 bytes:  7603200/  7603200
> > > diff --git a/tests/ref/vsynth/vsynth3-mpeg2-422
> > b/tests/ref/vsynth/vsynth3-
> > > mpeg2-422
> > > index 2247f286e6..e83c23635c 100644
> > > --- a/tests/ref/vsynth/vsynth3-mpeg2-422
> > > +++ b/tests/ref/vsynth/vsynth3-mpeg2-422
> > > @@ -1,4 +1,4 @@
> > > -4d108b861715f1fa010fd70baea91793 *tests/data/fate/vsynth3-mpeg2-
> > > 422.mpeg2video
> > > -68612 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
> > > -73b16e906d07b6bbccf4b00d4a25302c *tests/data/fate/vsynth3-mpeg2-
> > > 422.out.rawvideo
> > > -stddev:    4.02 PSNR: 36.05 MAXDIFF:   46 bytes:    86700/    86700
> > > +96dc854fc40a6410d4edc387b63ded1a *tests/data/fate/vsynth3-mpeg2-
> > > 422.mpeg2video
> > > +75445 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
> > > +5337fc0bfb789919cb3325a02d48eb82 *tests/data/fate/vsynth3-mpeg2-
> > > 422.out.rawvideo
> > > +stddev:    3.24 PSNR: 37.90 MAXDIFF:   27 bytes:    86700/    86700
> > > diff --git a/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > > b/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > > index 5f11d4e7cd..85d598782c 100644
> > > --- a/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > > +++ b/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > > @@ -1,4 +1,4 @@
> > > -521ec92c0b8672011a43dd13db98c400 *tests/data/fate/vsynth_lena-
> mpeg2-
> > > 422.mpeg2video
> > > -356431 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
> > > -51ca353620f85db8b5b1c56f1a275add *tests/data/fate/vsynth_lena-
> mpeg2-
> > > 422.out.rawvideo
> > > -stddev:    3.15 PSNR: 38.14 MAXDIFF:   49 bytes:  7603200/  7603200
> > > +87e5c69775b363db649c253050536045 *tests/data/fate/vsynth_lena-
> mpeg2-
> > > 422.mpeg2video
> > > +361341 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
> > > +93cefcf101e6d0814acfaa626a02be90 *tests/data/fate/vsynth_lena-
> mpeg2-
> > > 422.out.rawvideo
> > > +stddev:    2.97 PSNR: 38.67 MAXDIFF:   57 bytes:  7603200/  7603200
> >
> >
> > Patch LGTM. `make fate-vsynth1-mpeg2-422` works again on my Ice Lake
> after
> > applying this patch. It was broken since commit 17a59a6, see
> >
> >
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/20220221122715.4244
> 75-1-onemda@gmail.com/
> 
> 
> But why PSNR got worse?

Which case? All PSNR are better.
vsynth1-mpeg2-422: 27.90 -> 28.68
vsynth2-mpeg2-422: 35.73 -> 36.23
vsynth3-mpeg2-422: 36.05 -> 37.90
vsynth_lena-mpeg2-422: 38.14 -> 38.67

> 
> 
> >
> >
> > Thanks
> > Haihao
> >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> >
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb()
  2022-07-01  8:47     ` Chen, Wenbin
@ 2022-07-01  8:55       ` Paul B Mahol
  0 siblings, 0 replies; 8+ messages in thread
From: Paul B Mahol @ 2022-07-01  8:55 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

On Fri, Jul 1, 2022 at 10:47 AM Chen, Wenbin <
wenbin.chen-at-intel.com@ffmpeg.org> wrote:

> > On Fri, Jul 1, 2022 at 10:00 AM Xiang, Haihao <
> > haihao.xiang-at-intel.com@ffmpeg.org> wrote:
> >
> > > On Fri, 2022-07-01 at 13:34 +0800, Wenbin Chen wrote:
> > > > For 422 frames we should not use hard coded 8 to calculate mb size
> for
> > > > uv plane. Chroma shift should be taken into consideration to be
> > > > compatiple with different sampling format.
> > > >
> > > > The error is reported by fate test when av_cpu_max_align() return 64
> > > > on the platform supporting AVX512. This is a hidden error and it is
> > > > exposed after commit 17a59a634c39b00a680c6ebbaea58db95594d13d.
> > > >
> > > > mpeg2enc has a mechanism to reuse frames. When it computes SSE (sum
> > of
> > > > squared error) on current mb, reconstructed mb will be wrote to the
> > > > previous mb space, so that the memory can be saved. However if the
> > align
> > > > is 64, the frame is shared in somewhere else, so the frame cannot be
> > > > reused and a new frame to store reconstrued data is created. Because
> > the
> > > > height of mb is wrong when compute sse on 422 frame, starting from
> the
> > > > second line of macro block, changed data is read when frame is reused
> > > > (we need to read row 16 rather than row 8 if frame is 422), and
> > unchanged
> > > > data is read when frame is not reused (a new frame is created so the
> > > > original frame will not be changed).
> > > >
> > > > That is why commit 17a59a634c39b00a680c6ebbaea58db95594d13d
> > exposes this
> > > > issue, because it add av_cpu_max_align() and this function return 64
> on
> > > > platform supporting AVX512 which lead to creating a frame in
> mpeg2enc,
> > > > and this lead to the different outputs.
> > > >
> > > > Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> > > > ---
> > > >  libavcodec/mpegvideo_enc.c             | 29 +++++++++++++------
> > > >  tests/ref/seek/vsynth_lena-mpeg2-422   | 40
> +++++++++++++-------------
> > > >  tests/ref/vsynth/vsynth1-mpeg2-422     |  8 +++---
> > > >  tests/ref/vsynth/vsynth2-mpeg2-422     |  8 +++---
> > > >  tests/ref/vsynth/vsynth3-mpeg2-422     |  8 +++---
> > > >  tests/ref/vsynth/vsynth_lena-mpeg2-422 |  8 +++---
> > > >  6 files changed, 56 insertions(+), 45 deletions(-)
> > > >
> > > > diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
> > > > index d6a85a037a..c9d9e2a764 100644
> > > > --- a/libavcodec/mpegvideo_enc.c
> > > > +++ b/libavcodec/mpegvideo_enc.c
> > > > @@ -2558,24 +2558,35 @@ static int sse(MpegEncContext *s, uint8_t
> > *src1,
> > > > uint8_t *src2, int w, int h, in
> > > >  static int sse_mb(MpegEncContext *s){
> > > >      int w= 16;
> > > >      int h= 16;
> > > > +    int chroma_mb_w = w >> s->chroma_x_shift;
> > > > +    int chroma_mb_h = h >> s->chroma_y_shift;
> > > >
> > > >      if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
> > > >      if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
> > > >
> > > >      if(w==16 && h==16)
> > > >        if(s->avctx->mb_cmp == FF_CMP_NSSE){
> > > > -        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x
> *
> > > 16 + s-
> > > > >mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> > > > -               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x
> *
> > > 8 + s-
> > > > >mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> > > > -               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x
> *
> > > 8 + s-
> > > > >mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> > > > +        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x
> *
> > > 16 + s-
> > > > >mb_y * s->linesize * 16,
> > > > +                               s->dest[0], s->linesize, 16) +
> > > > +               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x
> *
> > > > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > > > +                               s->dest[1], s->uvlinesize,
> chroma_mb_h) +
> > > > +               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x
> *
> > > > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > > > +                               s->dest[2], s->uvlinesize,
> chroma_mb_h);
> > > >        }else{
> > > > -        return s->mecc.sse[0](NULL, s->new_picture->data[0] +
> s->mb_x *
> > > 16 +
> > > > s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> > > > -               s->mecc.sse[1](NULL, s->new_picture->data[1] +
> s->mb_x
> > > *  8 +
> > > > s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> > > > -               s->mecc.sse[1](NULL, s->new_picture->data[2] +
> s->mb_x
> > > *  8 +
> > > > s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> > > > +        return s->mecc.sse[0](NULL, s->new_picture->data[0] +
> s->mb_x *
> > > 16 +
> > > > s->mb_y * s->linesize * 16,
> > > > +                              s->dest[0], s->linesize, 16) +
> > > > +               s->mecc.sse[1](NULL, s->new_picture->data[1] +
> s->mb_x *
> > > > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > > > +                              s->dest[1], s->uvlinesize,
> chroma_mb_h) +
> > > > +               s->mecc.sse[1](NULL, s->new_picture->data[2] +
> s->mb_x *
> > > > chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > > > +                              s->dest[2], s->uvlinesize,
> chroma_mb_h);
> > > >        }
> > > >      else
> > > > -        return  sse(s, s->new_picture->data[0] + s->mb_x*16 +
> s->mb_y*s-
> > > > >linesize*16, s->dest[0], w, h, s->linesize)
> > > > -               +sse(s, s->new_picture->data[1] + s->mb_x*8  +
> s->mb_y*s-
> > > > >uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
> > > > -               +sse(s, s->new_picture->data[2] + s->mb_x*8  +
> s->mb_y*s-
> > > > >uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
> > > > +        return  sse(s, s->new_picture->data[0] + s->mb_x * 16 +
> s->mb_y
> > > * s-
> > > > >linesize * 16,
> > > > +                    s->dest[0], w, h, s->linesize) +
> > > > +                sse(s, s->new_picture->data[1] + s->mb_x *
> chroma_mb_w
> > > + s-
> > > > >mb_y * s->uvlinesize * chroma_mb_h,
> > > > +                    s->dest[1], w >> s->chroma_x_shift, h >> s-
> > > > >chroma_y_shift, s->uvlinesize) +
> > > > +                sse(s, s->new_picture->data[2] + s->mb_x *
> chroma_mb_w
> > > + s-
> > > > >mb_y * s->uvlinesize * chroma_mb_h,
> > > > +                    s->dest[2], w >> s->chroma_x_shift, h >> s-
> > > > >chroma_y_shift, s->uvlinesize);
> > > >  }
> > > >
> > > >  static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
> > > > diff --git a/tests/ref/seek/vsynth_lena-mpeg2-422
> > > > b/tests/ref/seek/vsynth_lena-mpeg2-422
> > > > index 06d8f7ac3a..1fc2bf93a0 100644
> > > > --- a/tests/ref/seek/vsynth_lena-mpeg2-422
> > > > +++ b/tests/ref/seek/vsynth_lena-mpeg2-422
> > > > @@ -1,46 +1,46 @@
> > > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 17497
> > > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 18339
> > > >  ret: 0         st:-1 flags:0  ts:-1.000000
> > > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 17497
> > > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 18339
> > > >  ret: 0         st:-1 flags:1  ts: 1.894167
> > > > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> > > size:
> > > > 19967
> > > > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> > > size:
> > > > 21479
> > > >  ret: 0         st: 0 flags:0  ts: 0.788334
> > > > -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> > > size:
> > > > 22575
> > > > +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> > > size:
> > > > 24774
> > > >  ret:-1         st: 0 flags:1  ts:-0.317499
> > > >  ret:-1         st:-1 flags:0  ts: 2.576668
> > > >  ret: 0         st:-1 flags:1  ts: 1.470835
> > > > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> > > size:
> > > > 21329
> > > > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> > > size:
> > > > 22845
> > > >  ret: 0         st: 0 flags:0  ts: 0.365002
> > > > -ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454
> > > size:
> > > > 28984
> > > > +ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763
> > > size:
> > > > 32204
> > > >  ret:-1         st: 0 flags:1  ts:-0.740831
> > > >  ret:-1         st:-1 flags:0  ts: 2.153336
> > > >  ret: 0         st:-1 flags:1  ts: 1.047503
> > > > -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> > > size:
> > > > 22575
> > > > +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> > > size:
> > > > 24774
> > > >  ret: 0         st: 0 flags:0  ts:-0.058330
> > > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 17497
> > > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 18339
> > > >  ret: 0         st: 0 flags:1  ts: 2.835837
> > > > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> > > size:
> > > > 19967
> > > > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> > > size:
> > > > 21479
> > > >  ret: 0         st:-1 flags:0  ts: 1.730004
> > > > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> > > size:
> > > > 19967
> > > > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> > > size:
> > > > 21479
> > > >  ret: 0         st:-1 flags:1  ts: 0.624171
> > > > -ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454
> > > size:
> > > > 28984
> > > > +ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763
> > > size:
> > > > 32204
> > > >  ret: 0         st: 0 flags:0  ts:-0.481662
> > > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 17497
> > > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 18339
> > > >  ret: 0         st: 0 flags:1  ts: 2.412505
> > > > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> > > size:
> > > > 19967
> > > > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> > > size:
> > > > 21479
> > > >  ret: 0         st:-1 flags:0  ts: 1.306672
> > > > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> > > size:
> > > > 21329
> > > > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> > > size:
> > > > 22845
> > > >  ret: 0         st:-1 flags:1  ts: 0.200839
> > > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 17497
> > > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 18339
> > > >  ret: 0         st: 0 flags:0  ts:-0.904994
> > > > -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 17497
> > > > +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> > > size:
> > > > 18339
> > > >  ret: 0         st: 0 flags:1  ts: 1.989173
> > > > -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> > > size:
> > > > 19967
> > > > +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> > > size:
> > > > 21479
> > > >  ret: 0         st:-1 flags:0  ts: 0.883340
> > > > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> > > size:
> > > > 21329
> > > > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> > > size:
> > > > 22845
> > > >  ret:-1         st:-1 flags:1  ts:-0.222493
> > > >  ret:-1         st: 0 flags:0  ts: 2.671674
> > > >  ret: 0         st: 0 flags:1  ts: 1.565841
> > > > -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> > > size:
> > > > 21329
> > > > +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> > > size:
> > > > 22845
> > > >  ret: 0         st:-1 flags:0  ts: 0.460008
> > > > -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> > > size:
> > > > 22575
> > > > +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> > > size:
> > > > 24774
> > > >  ret:-1         st:-1 flags:1  ts:-0.645825
> > > > diff --git a/tests/ref/vsynth/vsynth1-mpeg2-422
> > > b/tests/ref/vsynth/vsynth1-
> > > > mpeg2-422
> > > > index e936ba463e..a39b2b4dce 100644
> > > > --- a/tests/ref/vsynth/vsynth1-mpeg2-422
> > > > +++ b/tests/ref/vsynth/vsynth1-mpeg2-422
> > > > @@ -1,4 +1,4 @@
> > > > -6e135a1a27235a320311a932147846b4 *tests/data/fate/vsynth1-mpeg2-
> > > > 422.mpeg2video
> > > > -730780 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
> > > > -0273cd8463d1fc115378748239951560 *tests/data/fate/vsynth1-mpeg2-
> > > > 422.out.rawvideo
> > > > -stddev:   10.27 PSNR: 27.90 MAXDIFF:  162 bytes:  7603200/  7603200
> > > > +651f054730be04257498eeae08b3bdc5 *tests/data/fate/vsynth1-mpeg2-
> > > > 422.mpeg2video
> > > > +855859 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
> > > > +10fd55dbd10b08f9271eb89617766be8 *tests/data/fate/vsynth1-mpeg2-
> > > > 422.out.rawvideo
> > > > +stddev:    9.38 PSNR: 28.68 MAXDIFF:  178 bytes:  7603200/  7603200
> > > > diff --git a/tests/ref/vsynth/vsynth2-mpeg2-422
> > > b/tests/ref/vsynth/vsynth2-
> > > > mpeg2-422
> > > > index ec7244f9f9..5da29b4eac 100644
> > > > --- a/tests/ref/vsynth/vsynth2-mpeg2-422
> > > > +++ b/tests/ref/vsynth/vsynth2-mpeg2-422
> > > > @@ -1,4 +1,4 @@
> > > > -b2fa9b73c3547191ecc01b8163abd4e5 *tests/data/fate/vsynth2-mpeg2-
> > > > 422.mpeg2video
> > > > -379164 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
> > > > -704f6a96f93c2409219bd48b74169041 *tests/data/fate/vsynth2-mpeg2-
> > > > 422.out.rawvideo
> > > > -stddev:    4.17 PSNR: 35.73 MAXDIFF:   70 bytes:  7603200/  7603200
> > > > +fcaf6242ca4b706d392e40d9631e3465 *tests/data/fate/vsynth2-mpeg2-
> > > > 422.mpeg2video
> > > > +383946 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
> > > > +474bb259f041bfb971691551f678fd09 *tests/data/fate/vsynth2-mpeg2-
> > > > 422.out.rawvideo
> > > > +stddev:    3.93 PSNR: 36.23 MAXDIFF:   69 bytes:  7603200/  7603200
> > > > diff --git a/tests/ref/vsynth/vsynth3-mpeg2-422
> > > b/tests/ref/vsynth/vsynth3-
> > > > mpeg2-422
> > > > index 2247f286e6..e83c23635c 100644
> > > > --- a/tests/ref/vsynth/vsynth3-mpeg2-422
> > > > +++ b/tests/ref/vsynth/vsynth3-mpeg2-422
> > > > @@ -1,4 +1,4 @@
> > > > -4d108b861715f1fa010fd70baea91793 *tests/data/fate/vsynth3-mpeg2-
> > > > 422.mpeg2video
> > > > -68612 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
> > > > -73b16e906d07b6bbccf4b00d4a25302c *tests/data/fate/vsynth3-mpeg2-
> > > > 422.out.rawvideo
> > > > -stddev:    4.02 PSNR: 36.05 MAXDIFF:   46 bytes:    86700/    86700
> > > > +96dc854fc40a6410d4edc387b63ded1a *tests/data/fate/vsynth3-mpeg2-
> > > > 422.mpeg2video
> > > > +75445 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
> > > > +5337fc0bfb789919cb3325a02d48eb82 *tests/data/fate/vsynth3-mpeg2-
> > > > 422.out.rawvideo
> > > > +stddev:    3.24 PSNR: 37.90 MAXDIFF:   27 bytes:    86700/    86700
> > > > diff --git a/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > > > b/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > > > index 5f11d4e7cd..85d598782c 100644
> > > > --- a/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > > > +++ b/tests/ref/vsynth/vsynth_lena-mpeg2-422
> > > > @@ -1,4 +1,4 @@
> > > > -521ec92c0b8672011a43dd13db98c400 *tests/data/fate/vsynth_lena-
> > mpeg2-
> > > > 422.mpeg2video
> > > > -356431 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
> > > > -51ca353620f85db8b5b1c56f1a275add *tests/data/fate/vsynth_lena-
> > mpeg2-
> > > > 422.out.rawvideo
> > > > -stddev:    3.15 PSNR: 38.14 MAXDIFF:   49 bytes:  7603200/  7603200
> > > > +87e5c69775b363db649c253050536045 *tests/data/fate/vsynth_lena-
> > mpeg2-
> > > > 422.mpeg2video
> > > > +361341 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
> > > > +93cefcf101e6d0814acfaa626a02be90 *tests/data/fate/vsynth_lena-
> > mpeg2-
> > > > 422.out.rawvideo
> > > > +stddev:    2.97 PSNR: 38.67 MAXDIFF:   57 bytes:  7603200/  7603200
> > >
> > >
> > > Patch LGTM. `make fate-vsynth1-mpeg2-422` works again on my Ice Lake
> > after
> > > applying this patch. It was broken since commit 17a59a6, see
> > >
> > >
> > https://patchwork.ffmpeg.org/project/ffmpeg/patch/20220221122715.4244
> > 75-1-onemda@gmail.com/
> >
> >
> > But why PSNR got worse?
>
> Which case? All PSNR are better.
> vsynth1-mpeg2-422: 27.90 -> 28.68
> vsynth2-mpeg2-422: 35.73 -> 36.23
> vsynth3-mpeg2-422: 36.05 -> 37.90
> vsynth_lena-mpeg2-422: 38.14 -> 38.67
>

Ah, I looked at stddev numbers.

>
> >
> >
> > >
> > >
> > > Thanks
> > > Haihao
> > >
> > > _______________________________________________
> > > ffmpeg-devel mailing list
> > > ffmpeg-devel@ffmpeg.org
> > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> > > To unsubscribe, visit link above, or email
> > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> > >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb()
  2022-07-01  5:34 [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb() Wenbin Chen
  2022-07-01  7:59 ` Xiang, Haihao
@ 2022-07-01 20:15 ` Michael Niedermayer
  2022-07-04  2:14   ` Chen, Wenbin
  2022-07-01 21:19 ` Soft Works
  2 siblings, 1 reply; 8+ messages in thread
From: Michael Niedermayer @ 2022-07-01 20:15 UTC (permalink / raw)
  To: FFmpeg development discussions and patches


[-- Attachment #1.1: Type: text/plain, Size: 4129 bytes --]

On Fri, Jul 01, 2022 at 01:34:34PM +0800, Wenbin Chen wrote:
> For 422 frames we should not use hard coded 8 to calculate mb size for
> uv plane. Chroma shift should be taken into consideration to be
> compatiple with different sampling format.
> 
> The error is reported by fate test when av_cpu_max_align() return 64
> on the platform supporting AVX512. This is a hidden error and it is
> exposed after commit 17a59a634c39b00a680c6ebbaea58db95594d13d.
> 
> mpeg2enc has a mechanism to reuse frames. When it computes SSE (sum of
> squared error) on current mb, reconstructed mb will be wrote to the
> previous mb space, so that the memory can be saved. However if the align
> is 64, the frame is shared in somewhere else, so the frame cannot be
> reused and a new frame to store reconstrued data is created. Because the
> height of mb is wrong when compute sse on 422 frame, starting from the
> second line of macro block, changed data is read when frame is reused
> (we need to read row 16 rather than row 8 if frame is 422), and unchanged
> data is read when frame is not reused (a new frame is created so the
> original frame will not be changed).
> 
> That is why commit 17a59a634c39b00a680c6ebbaea58db95594d13d exposes this
> issue, because it add av_cpu_max_align() and this function return 64 on
> platform supporting AVX512 which lead to creating a frame in mpeg2enc,
> and this lead to the different outputs.
> 
> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> ---
>  libavcodec/mpegvideo_enc.c             | 29 +++++++++++++------
>  tests/ref/seek/vsynth_lena-mpeg2-422   | 40 +++++++++++++-------------
>  tests/ref/vsynth/vsynth1-mpeg2-422     |  8 +++---
>  tests/ref/vsynth/vsynth2-mpeg2-422     |  8 +++---
>  tests/ref/vsynth/vsynth3-mpeg2-422     |  8 +++---
>  tests/ref/vsynth/vsynth_lena-mpeg2-422 |  8 +++---
>  6 files changed, 56 insertions(+), 45 deletions(-)
> 
> diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
> index d6a85a037a..c9d9e2a764 100644
> --- a/libavcodec/mpegvideo_enc.c
> +++ b/libavcodec/mpegvideo_enc.c
> @@ -2558,24 +2558,35 @@ static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, in
>  static int sse_mb(MpegEncContext *s){
>      int w= 16;
>      int h= 16;
> +    int chroma_mb_w = w >> s->chroma_x_shift;
> +    int chroma_mb_h = h >> s->chroma_y_shift;
>  
>      if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
>      if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
>  
>      if(w==16 && h==16)
>        if(s->avctx->mb_cmp == FF_CMP_NSSE){

> -        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> +        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16,
> +                               s->dest[0], s->linesize, 16) +

This doesnt belong in this patch, its not a functional change and makes it
harder to see what is changed

please seperate this in a seperate patch if you want to suggest this whitespace change

thx


> -               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> -               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> +               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                               s->dest[1], s->uvlinesize, chroma_mb_h) +
> +               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                               s->dest[2], s->uvlinesize, chroma_mb_h);

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If you fake or manipulate statistics in a paper in physics you will never
get a job again.
If you fake or manipulate statistics in a paper in medicin you will get
a job for life at the pharma industry.

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

[-- Attachment #2: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb()
  2022-07-01  5:34 [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb() Wenbin Chen
  2022-07-01  7:59 ` Xiang, Haihao
  2022-07-01 20:15 ` Michael Niedermayer
@ 2022-07-01 21:19 ` Soft Works
  2 siblings, 0 replies; 8+ messages in thread
From: Soft Works @ 2022-07-01 21:19 UTC (permalink / raw)
  To: FFmpeg development discussions and patches



> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> Wenbin Chen
> Sent: Friday, July 1, 2022 7:35 AM
> To: ffmpeg-devel@ffmpeg.org
> Subject: [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a
> chroma mb size error in sse_mb()
> 
> For 422 frames we should not use hard coded 8 to calculate mb size
> for
> uv plane. Chroma shift should be taken into consideration to be
> compatiple with different sampling format.
> 
> The error is reported by fate test when av_cpu_max_align() return 64
> on the platform supporting AVX512. This is a hidden error and it is
> exposed after commit 17a59a634c39b00a680c6ebbaea58db95594d13d.
> 
> mpeg2enc has a mechanism to reuse frames. When it computes SSE (sum
> of
> squared error) on current mb, reconstructed mb will be wrote to the
> previous mb space, so that the memory can be saved. However if the
> align
> is 64, the frame is shared in somewhere else, so the frame cannot be
> reused and a new frame to store reconstrued data is created. Because
> the
> height of mb is wrong when compute sse on 422 frame, starting from
> the
> second line of macro block, changed data is read when frame is reused
> (we need to read row 16 rather than row 8 if frame is 422), and
> unchanged
> data is read when frame is not reused (a new frame is created so the
> original frame will not be changed).
> 
> That is why commit 17a59a634c39b00a680c6ebbaea58db95594d13d exposes
> this
> issue, because it add av_cpu_max_align() and this function return 64
> on
> platform supporting AVX512 which lead to creating a frame in
> mpeg2enc,
> and this lead to the different outputs.
> 
> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> ---
>  libavcodec/mpegvideo_enc.c             | 29 +++++++++++++------
>  tests/ref/seek/vsynth_lena-mpeg2-422   | 40 +++++++++++++-----------
> --
>  tests/ref/vsynth/vsynth1-mpeg2-422     |  8 +++---
>  tests/ref/vsynth/vsynth2-mpeg2-422     |  8 +++---
>  tests/ref/vsynth/vsynth3-mpeg2-422     |  8 +++---
>  tests/ref/vsynth/vsynth_lena-mpeg2-422 |  8 +++---
>  6 files changed, 56 insertions(+), 45 deletions(-)
> 
> diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
> index d6a85a037a..c9d9e2a764 100644
> --- a/libavcodec/mpegvideo_enc.c
> +++ b/libavcodec/mpegvideo_enc.c
> @@ -2558,24 +2558,35 @@ static int sse(MpegEncContext *s, uint8_t
> *src1, uint8_t *src2, int w, int h, in
>  static int sse_mb(MpegEncContext *s){
>      int w= 16;
>      int h= 16;
> +    int chroma_mb_w = w >> s->chroma_x_shift;
> +    int chroma_mb_h = h >> s->chroma_y_shift;
> 
>      if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
>      if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
> 
>      if(w==16 && h==16)
>        if(s->avctx->mb_cmp == FF_CMP_NSSE){
> -        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x
> * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> -               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x
> *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> -               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x
> *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> +        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x
> * 16 + s->mb_y * s->linesize * 16,
> +                               s->dest[0], s->linesize, 16) +
> +               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x
> * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                               s->dest[1], s->uvlinesize,
> chroma_mb_h) +
> +               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x
> * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                               s->dest[2], s->uvlinesize,
> chroma_mb_h);
>        }else{
> -        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s-
> >mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,
> 16) +
> -               s->mecc.sse[1](NULL, s->new_picture->data[1] + s-
> >mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,
> 8) +
> -               s->mecc.sse[1](NULL, s->new_picture->data[2] + s-
> >mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,
> 8);
> +        return s->mecc.sse[0](NULL, s->new_picture->data[0] + s-
> >mb_x * 16 + s->mb_y * s->linesize * 16,
> +                              s->dest[0], s->linesize, 16) +
> +               s->mecc.sse[1](NULL, s->new_picture->data[1] + s-
> >mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                              s->dest[1], s->uvlinesize,
> chroma_mb_h) +
> +               s->mecc.sse[1](NULL, s->new_picture->data[2] + s-
> >mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                              s->dest[2], s->uvlinesize,
> chroma_mb_h);
>        }
>      else
> -        return  sse(s, s->new_picture->data[0] + s->mb_x*16 + s-
> >mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
> -               +sse(s, s->new_picture->data[1] + s->mb_x*8  + s-
> >mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
> -               +sse(s, s->new_picture->data[2] + s->mb_x*8  + s-
> >mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
> +        return  sse(s, s->new_picture->data[0] + s->mb_x * 16 + s-
> >mb_y * s->linesize * 16,
> +                    s->dest[0], w, h, s->linesize) +
> +                sse(s, s->new_picture->data[1] + s->mb_x *
> chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                    s->dest[1], w >> s->chroma_x_shift, h >> s-
> >chroma_y_shift, s->uvlinesize) +
> +                sse(s, s->new_picture->data[2] + s->mb_x *
> chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> +                    s->dest[2], w >> s->chroma_x_shift, h >> s-
> >chroma_y_shift, s->uvlinesize);
>  }
> 
>  static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
> diff --git a/tests/ref/seek/vsynth_lena-mpeg2-422
> b/tests/ref/seek/vsynth_lena-mpeg2-422
> index 06d8f7ac3a..1fc2bf93a0 100644
> --- a/tests/ref/seek/vsynth_lena-mpeg2-422
> +++ b/tests/ref/seek/vsynth_lena-mpeg2-422
> @@ -1,46 +1,46 @@
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 18339
>  ret: 0         st:-1 flags:0  ts:-1.000000
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 18339
>  ret: 0         st:-1 flags:1  ts: 1.894167
> -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> size: 19967
> +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> size: 21479
>  ret: 0         st: 0 flags:0  ts: 0.788334
> -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> size: 22575
> +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> size: 24774
>  ret:-1         st: 0 flags:1  ts:-0.317499
>  ret:-1         st:-1 flags:0  ts: 2.576668
>  ret: 0         st:-1 flags:1  ts: 1.470835
> -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> size: 21329
> +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> size: 22845
>  ret: 0         st: 0 flags:0  ts: 0.365002
> -ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454
> size: 28984
> +ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763
> size: 32204
>  ret:-1         st: 0 flags:1  ts:-0.740831
>  ret:-1         st:-1 flags:0  ts: 2.153336
>  ret: 0         st:-1 flags:1  ts: 1.047503
> -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> size: 22575
> +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> size: 24774
>  ret: 0         st: 0 flags:0  ts:-0.058330
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 18339
>  ret: 0         st: 0 flags:1  ts: 2.835837
> -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> size: 19967
> +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> size: 21479
>  ret: 0         st:-1 flags:0  ts: 1.730004
> -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> size: 19967
> +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> size: 21479
>  ret: 0         st:-1 flags:1  ts: 0.624171
> -ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 104454
> size: 28984
> +ret: 0         st: 0 flags:1 dts: 0.400000 pts: NOPTS    pos: 108763
> size: 32204
>  ret: 0         st: 0 flags:0  ts:-0.481662
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 18339
>  ret: 0         st: 0 flags:1  ts: 2.412505
> -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> size: 19967
> +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> size: 21479
>  ret: 0         st:-1 flags:0  ts: 1.306672
> -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> size: 21329
> +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> size: 22845
>  ret: 0         st:-1 flags:1  ts: 0.200839
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 18339
>  ret: 0         st: 0 flags:0  ts:-0.904994
> -ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 17497
> +ret: 0         st: 0 flags:1 dts: 0.000000 pts: NOPTS    pos:      0
> size: 18339
>  ret: 0         st: 0 flags:1  ts: 1.989173
> -ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 325397
> size: 19967
> +ret: 0         st: 0 flags:1 dts: 1.840000 pts: NOPTS    pos: 329242
> size: 21479
>  ret: 0         st:-1 flags:0  ts: 0.883340
> -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> size: 21329
> +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> size: 22845
>  ret:-1         st:-1 flags:1  ts:-0.222493
>  ret:-1         st: 0 flags:0  ts: 2.671674
>  ret: 0         st: 0 flags:1  ts: 1.565841
> -ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 265466
> size: 21329
> +ret: 0         st: 0 flags:1 dts: 1.360000 pts: NOPTS    pos: 268154
> size: 22845
>  ret: 0         st:-1 flags:0  ts: 0.460008
> -ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 200747
> size: 22575
> +ret: 0         st: 0 flags:1 dts: 0.880000 pts: NOPTS    pos: 203086
> size: 24774
>  ret:-1         st:-1 flags:1  ts:-0.645825
> diff --git a/tests/ref/vsynth/vsynth1-mpeg2-422
> b/tests/ref/vsynth/vsynth1-mpeg2-422
> index e936ba463e..a39b2b4dce 100644
> --- a/tests/ref/vsynth/vsynth1-mpeg2-422
> +++ b/tests/ref/vsynth/vsynth1-mpeg2-422
> @@ -1,4 +1,4 @@
> -6e135a1a27235a320311a932147846b4 *tests/data/fate/vsynth1-mpeg2-
> 422.mpeg2video
> -730780 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
> -0273cd8463d1fc115378748239951560 *tests/data/fate/vsynth1-mpeg2-
> 422.out.rawvideo
> -stddev:   10.27 PSNR: 27.90 MAXDIFF:  162 bytes:  7603200/  7603200
> +651f054730be04257498eeae08b3bdc5 *tests/data/fate/vsynth1-mpeg2-
> 422.mpeg2video
> +855859 tests/data/fate/vsynth1-mpeg2-422.mpeg2video
> +10fd55dbd10b08f9271eb89617766be8 *tests/data/fate/vsynth1-mpeg2-
> 422.out.rawvideo
> +stddev:    9.38 PSNR: 28.68 MAXDIFF:  178 bytes:  7603200/  7603200
> diff --git a/tests/ref/vsynth/vsynth2-mpeg2-422
> b/tests/ref/vsynth/vsynth2-mpeg2-422
> index ec7244f9f9..5da29b4eac 100644
> --- a/tests/ref/vsynth/vsynth2-mpeg2-422
> +++ b/tests/ref/vsynth/vsynth2-mpeg2-422
> @@ -1,4 +1,4 @@
> -b2fa9b73c3547191ecc01b8163abd4e5 *tests/data/fate/vsynth2-mpeg2-
> 422.mpeg2video
> -379164 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
> -704f6a96f93c2409219bd48b74169041 *tests/data/fate/vsynth2-mpeg2-
> 422.out.rawvideo
> -stddev:    4.17 PSNR: 35.73 MAXDIFF:   70 bytes:  7603200/  7603200
> +fcaf6242ca4b706d392e40d9631e3465 *tests/data/fate/vsynth2-mpeg2-
> 422.mpeg2video
> +383946 tests/data/fate/vsynth2-mpeg2-422.mpeg2video
> +474bb259f041bfb971691551f678fd09 *tests/data/fate/vsynth2-mpeg2-
> 422.out.rawvideo
> +stddev:    3.93 PSNR: 36.23 MAXDIFF:   69 bytes:  7603200/  7603200
> diff --git a/tests/ref/vsynth/vsynth3-mpeg2-422
> b/tests/ref/vsynth/vsynth3-mpeg2-422
> index 2247f286e6..e83c23635c 100644
> --- a/tests/ref/vsynth/vsynth3-mpeg2-422
> +++ b/tests/ref/vsynth/vsynth3-mpeg2-422
> @@ -1,4 +1,4 @@
> -4d108b861715f1fa010fd70baea91793 *tests/data/fate/vsynth3-mpeg2-
> 422.mpeg2video
> -68612 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
> -73b16e906d07b6bbccf4b00d4a25302c *tests/data/fate/vsynth3-mpeg2-
> 422.out.rawvideo
> -stddev:    4.02 PSNR: 36.05 MAXDIFF:   46 bytes:    86700/    86700
> +96dc854fc40a6410d4edc387b63ded1a *tests/data/fate/vsynth3-mpeg2-
> 422.mpeg2video
> +75445 tests/data/fate/vsynth3-mpeg2-422.mpeg2video
> +5337fc0bfb789919cb3325a02d48eb82 *tests/data/fate/vsynth3-mpeg2-
> 422.out.rawvideo
> +stddev:    3.24 PSNR: 37.90 MAXDIFF:   27 bytes:    86700/    86700
> diff --git a/tests/ref/vsynth/vsynth_lena-mpeg2-422
> b/tests/ref/vsynth/vsynth_lena-mpeg2-422
> index 5f11d4e7cd..85d598782c 100644
> --- a/tests/ref/vsynth/vsynth_lena-mpeg2-422
> +++ b/tests/ref/vsynth/vsynth_lena-mpeg2-422
> @@ -1,4 +1,4 @@
> -521ec92c0b8672011a43dd13db98c400 *tests/data/fate/vsynth_lena-mpeg2-
> 422.mpeg2video
> -356431 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
> -51ca353620f85db8b5b1c56f1a275add *tests/data/fate/vsynth_lena-mpeg2-
> 422.out.rawvideo
> -stddev:    3.15 PSNR: 38.14 MAXDIFF:   49 bytes:  7603200/  7603200
> +87e5c69775b363db649c253050536045 *tests/data/fate/vsynth_lena-mpeg2-
> 422.mpeg2video
> +361341 tests/data/fate/vsynth_lena-mpeg2-422.mpeg2video
> +93cefcf101e6d0814acfaa626a02be90 *tests/data/fate/vsynth_lena-mpeg2-
> 422.out.rawvideo
> +stddev:    2.97 PSNR: 38.67 MAXDIFF:   57 bytes:  7603200/  7603200
> --

LGTM. It fixes the broken FATE tests on new CPUs and it's no longer needed
to run configure with --disable-avx512

Thanks for figuring out the cause of this!

softworkz
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb()
  2022-07-01 20:15 ` Michael Niedermayer
@ 2022-07-04  2:14   ` Chen, Wenbin
  0 siblings, 0 replies; 8+ messages in thread
From: Chen, Wenbin @ 2022-07-04  2:14 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

> On Fri, Jul 01, 2022 at 01:34:34PM +0800, Wenbin Chen wrote:
> > For 422 frames we should not use hard coded 8 to calculate mb size for
> > uv plane. Chroma shift should be taken into consideration to be
> > compatiple with different sampling format.
> >
> > The error is reported by fate test when av_cpu_max_align() return 64
> > on the platform supporting AVX512. This is a hidden error and it is
> > exposed after commit 17a59a634c39b00a680c6ebbaea58db95594d13d.
> >
> > mpeg2enc has a mechanism to reuse frames. When it computes SSE (sum
> of
> > squared error) on current mb, reconstructed mb will be wrote to the
> > previous mb space, so that the memory can be saved. However if the align
> > is 64, the frame is shared in somewhere else, so the frame cannot be
> > reused and a new frame to store reconstrued data is created. Because the
> > height of mb is wrong when compute sse on 422 frame, starting from the
> > second line of macro block, changed data is read when frame is reused
> > (we need to read row 16 rather than row 8 if frame is 422), and unchanged
> > data is read when frame is not reused (a new frame is created so the
> > original frame will not be changed).
> >
> > That is why commit 17a59a634c39b00a680c6ebbaea58db95594d13d
> exposes this
> > issue, because it add av_cpu_max_align() and this function return 64 on
> > platform supporting AVX512 which lead to creating a frame in mpeg2enc,
> > and this lead to the different outputs.
> >
> > Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> > ---
> >  libavcodec/mpegvideo_enc.c             | 29 +++++++++++++------
> >  tests/ref/seek/vsynth_lena-mpeg2-422   | 40 +++++++++++++-------------
> >  tests/ref/vsynth/vsynth1-mpeg2-422     |  8 +++---
> >  tests/ref/vsynth/vsynth2-mpeg2-422     |  8 +++---
> >  tests/ref/vsynth/vsynth3-mpeg2-422     |  8 +++---
> >  tests/ref/vsynth/vsynth_lena-mpeg2-422 |  8 +++---
> >  6 files changed, 56 insertions(+), 45 deletions(-)
> >
> > diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
> > index d6a85a037a..c9d9e2a764 100644
> > --- a/libavcodec/mpegvideo_enc.c
> > +++ b/libavcodec/mpegvideo_enc.c
> > @@ -2558,24 +2558,35 @@ static int sse(MpegEncContext *s, uint8_t *src1,
> uint8_t *src2, int w, int h, in
> >  static int sse_mb(MpegEncContext *s){
> >      int w= 16;
> >      int h= 16;
> > +    int chroma_mb_w = w >> s->chroma_x_shift;
> > +    int chroma_mb_h = h >> s->chroma_y_shift;
> >
> >      if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
> >      if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
> >
> >      if(w==16 && h==16)
> >        if(s->avctx->mb_cmp == FF_CMP_NSSE){
> 
> > -        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x * 16 + s-
> >mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
> > +        return s->mecc.nsse[0](s, s->new_picture->data[0] + s->mb_x * 16 +
> s->mb_y * s->linesize * 16,
> > +                               s->dest[0], s->linesize, 16) +
> 
> This doesnt belong in this patch, its not a functional change and makes it
> harder to see what is changed
> 
> please seperate this in a seperate patch if you want to suggest this
> whitespace change
> 
> thx

Ok, I will separate it into two patches and resubmit them.

Thanks

> 
> 
> > -               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x *  8 + s-
> >mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
> > -               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x *  8 + s-
> >mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
> > +               s->mecc.nsse[1](s, s->new_picture->data[1] + s->mb_x *
> chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > +                               s->dest[1], s->uvlinesize, chroma_mb_h) +
> > +               s->mecc.nsse[1](s, s->new_picture->data[2] + s->mb_x *
> chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
> > +                               s->dest[2], s->uvlinesize, chroma_mb_h);
> 
> [...]
> --
> Michael     GnuPG fingerprint:
> 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> If you fake or manipulate statistics in a paper in physics you will never
> get a job again.
> If you fake or manipulate statistics in a paper in medicin you will get
> a job for life at the pharma industry.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-07-04  2:14 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-01  5:34 [FFmpeg-devel] [PATCH] libavcodec/mpegvideo_enc: Fix a chroma mb size error in sse_mb() Wenbin Chen
2022-07-01  7:59 ` Xiang, Haihao
2022-07-01  8:12   ` Paul B Mahol
2022-07-01  8:47     ` Chen, Wenbin
2022-07-01  8:55       ` Paul B Mahol
2022-07-01 20:15 ` Michael Niedermayer
2022-07-04  2:14   ` Chen, Wenbin
2022-07-01 21:19 ` Soft Works

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git