From 0faa3cf487081776a9b577b0fdfb20c4d9494ca7 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Date: Tue, 25 Oct 2022 18:27:17 +0200
Subject: [PATCH 48/77] avcodec/mpegvideo: Move allocating encoder buffers to
 mpegvideoenc.c

dct_error_sum and me.map are allocated per slice-context
and therefore their allocation has not been moved to
mpegvideoenc.c in 0154fb43e328b13da8943e66b38dc1b5ab9315af.

This commit changes this by allocating them jointly and
moving said allocations to mpegvideo_enc.c like the other
encoder-only buffers.
The buffers are suitably aligned to ensure that no false
sharing occurs.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/mpegvideo.c     | 17 ------------
 libavcodec/mpegvideo_enc.c | 54 ++++++++++++++++++++++++++++++++++++++
 libavcodec/mpegvideoenc.h  |  3 +++
 3 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index bc367fba07..8055e6c0e2 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -351,17 +351,6 @@ av_cold void ff_mpv_idct_init(MpegEncContext *s)
 
 static av_cold int init_duplicate_context(MpegEncContext *s)
 {
-    if (s->encoding) {
-        s->me.map = av_mallocz(2 * ME_MAP_SIZE * sizeof(*s->me.map));
-        if (!s->me.map)
-            return AVERROR(ENOMEM);
-        s->me.score_map = s->me.map + ME_MAP_SIZE;
-
-        if (s->noise_reduction) {
-            if (!FF_ALLOCZ_TYPED_ARRAY(s->dct_error_sum,  2))
-                return AVERROR(ENOMEM);
-        }
-    }
     if (!FF_ALLOCZ_TYPED_ARRAY(s->blocks,  1 + s->encoding))
         return AVERROR(ENOMEM);
     s->block = s->blocks[0];
@@ -420,9 +409,6 @@ static av_cold void free_duplicate_context(MpegEncContext *s)
     s->sc.obmc_scratchpad = NULL;
     s->sc.linesize = 0;
 
-    av_freep(&s->dct_error_sum);
-    av_freep(&s->me.map);
-    s->me.score_map = NULL;
     av_freep(&s->blocks);
     av_freep(&s->ac_val_base);
     s->block = NULL;
@@ -646,9 +632,6 @@ static void clear_context(MpegEncContext *s)
 
     memset(s->thread_context, 0, sizeof(s->thread_context));
 
-    s->me.map = NULL;
-    s->me.score_map = NULL;
-    s->dct_error_sum = NULL;
     s->block = NULL;
     s->blocks = NULL;
     s->ac_val_base = NULL;
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 116c974098..f6bd9be56b 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -33,6 +33,7 @@
 
 #include "config_components.h"
 
+#include <assert.h>
 #include <stdint.h>
 
 #include "libavutil/emms.h"
@@ -420,6 +421,53 @@ static av_cold int init_matrices(MPVMainEncContext *const m, AVCodecContext *avc
     return 0;
 }
 
+static av_cold int init_buffers(MPVMainEncContext *const m, AVCodecContext *avctx)
+{
+    MpegEncContext *const s = &m->s;
+    // Align the following per-thread buffers to avoid false sharing.
+    enum {
+#ifndef _MSC_VER
+        /// The number is supposed to match/exceed the cache-line size.
+        ALIGN = FFMAX(128, _Alignof(max_align_t)),
+#else
+        ALIGN = 128,
+#endif
+        ME_MAP_ALLOC_SIZE = FFALIGN(2 * ME_MAP_SIZE * sizeof(*s->me.map), ALIGN),
+        DCT_ERROR_SIZE    = FFALIGN(2 * sizeof(*s->dct_error_sum), ALIGN),
+    };
+    static_assert(FFMAX(ME_MAP_ALLOC_SIZE, DCT_ERROR_SIZE) * MAX_THREADS + ALIGN - 1 <= SIZE_MAX,
+                  "Need checks for potential overflow.");
+    unsigned nb_slices = s->slice_context_count;
+    char *dct_error = NULL, *me_map;
+
+    if (s->noise_reduction) {
+        dct_error = av_mallocz(ALIGN - 1 + nb_slices * DCT_ERROR_SIZE);
+        if (!dct_error)
+            return AVERROR(ENOMEM);
+        m->dct_error_sum_base = dct_error;
+        dct_error += FFALIGN((uintptr_t)dct_error, ALIGN) - (uintptr_t)dct_error;
+    }
+    me_map = av_mallocz(ALIGN - 1 + nb_slices * ME_MAP_ALLOC_SIZE);
+    if (!me_map)
+        return AVERROR(ENOMEM);
+    m->me_map_base = me_map;
+    me_map += FFALIGN((uintptr_t)me_map, ALIGN) - (uintptr_t)me_map;
+
+    for (unsigned i = 0; i < nb_slices; ++i) {
+        MpegEncContext *const s2 = s->thread_context[i];
+
+        if (dct_error) {
+            s2->dct_error_sum = (void*)dct_error;
+            dct_error        += DCT_ERROR_SIZE;
+        }
+        s2->me.map       = (uint32_t*)me_map;
+        s2->me.score_map = s2->me.map + ME_MAP_SIZE;
+        me_map          += ME_MAP_ALLOC_SIZE;
+    }
+
+    return 0;
+}
+
 /* init video encoder */
 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 {
@@ -923,6 +971,10 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
     if (ret < 0)
         return ret;
 
+    ret = init_buffers(m, avctx);
+    if (ret < 0)
+        return ret;
+
     /* Allocate MV tables; the MV and MB tables will be copied
      * to slice contexts by ff_update_duplicate_context().  */
     mv_table_size = (s->mb_height + 2) * s->mb_stride + 1;
@@ -1067,6 +1119,8 @@ av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
     av_freep(&s->b_field_mv_table_base);
     av_freep(&s->b_field_select_table[0][0]);
     av_freep(&s->p_field_select_table[0]);
+    av_freep(&m->dct_error_sum_base);
+    av_freep(&m->me_map_base);
 
     av_freep(&s->mb_type);
     av_freep(&s->lambda_table);
diff --git a/libavcodec/mpegvideoenc.h b/libavcodec/mpegvideoenc.h
index 57b91d169a..dfd86d76cc 100644
--- a/libavcodec/mpegvideoenc.h
+++ b/libavcodec/mpegvideoenc.h
@@ -98,6 +98,9 @@ typedef struct MPVMainEncContext {
 
     int64_t mb_var_sum;            ///< sum of MB variance for current frame
     int64_t mc_mb_var_sum;         ///< motion compensated MB variance for current frame
+
+    char *me_map_base;             ///< backs MotionEstContext.(map|score_map)
+    char *dct_error_sum_base;      ///< backs dct_error_sum
 } MPVMainEncContext;
 
 static inline const MPVMainEncContext *slice_to_mainenc(const MpegEncContext *s)
-- 
2.45.2