Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
* [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading
@ 2022-09-28 10:04 Tomas Härdin
  2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 02/11] lavc/jpeg2000dec: Reindent Tomas Härdin
                   ` (9 more replies)
  0 siblings, 10 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:04 UTC (permalink / raw)
  To: ffmpeg-devel

[-- Attachment #1: Type: text/plain, Size: 191 bytes --]

Hi

This patchset depends both on my execute2() patchset and on
av_realloc*_array_reuse(). The performance numbers aren't fully up to
date, but I expect they won't have changed much.

/Tomas

[-- Attachment #2: 0001-lavc-jpeg2000dec-Finer-granularity-threading.patch --]
[-- Type: text/x-patch, Size: 11750 bytes --]

From 9f70f673b2977e969c2c6df51d7a2b7d73302f08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Fri, 10 Jun 2022 14:10:02 +0200
Subject: [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading

Decoding and dequant is now threaded on codeblock level.
IDWT is threaded on component level.
MCT and write_frame() remain threaded on tile level.

This brings lossless 4K J2K with -lowres 2 -thread_type slice -threads 96 on an AMD EPYC 7R32 from 4.8 fps (177% CPU) to 31 fps (1284% CPU).
---
 libavcodec/jpeg2000dec.c | 195 ++++++++++++++++++++++++++++-----------
 1 file changed, 141 insertions(+), 54 deletions(-)

diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index c2b81ec103..e823ae58ec 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -92,6 +92,15 @@ typedef struct Jpeg2000Tile {
     int coord[2][2];                    // border coordinates {{x0, x1}, {y0, y1}}
 } Jpeg2000Tile;
 
+typedef struct Jpeg2000IdwtThread {
+    int cb_start, cb_end;
+} Jpeg2000IdwtThread;
+
+typedef struct Jpeg2000CodeblockThread {
+    int tileno, compno, reslevelno, bandno, precno, cblkno;
+    int coded;
+} Jpeg2000CodeblockThread;
+
 typedef struct Jpeg2000DecoderContext {
     AVClass         *class;
     AVCodecContext  *avctx;
@@ -136,6 +145,11 @@ typedef struct Jpeg2000DecoderContext {
 
     /*options parameters*/
     int             reduction_factor;
+
+    Jpeg2000IdwtThread *idwt;
+    size_t idwt_allocated;
+    Jpeg2000CodeblockThread *cb;
+    size_t cb_allocated;
 } Jpeg2000DecoderContext;
 
 /* get_bits functions for JPEG2000 packet bitstream
@@ -1941,54 +1955,33 @@ static inline void roi_scale_cblk(Jpeg2000Cblk *cblk,
     }
 }
 
-static inline void tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
+static int jpeg2000_decode_cb(AVCodecContext *avctx, void *td,
+                              int jobnr, int threadnr)
 {
     Jpeg2000T1Context t1;
-
-    int compno, reslevelno, bandno;
-
-    /* Loop on tile components */
-    for (compno = 0; compno < s->ncomponents; compno++) {
-        Jpeg2000Component *comp     = tile->comp + compno;
-        Jpeg2000CodingStyle *codsty = tile->codsty + compno;
-        int coded = 0;
-
-        t1.stride = (1<<codsty->log2_cblk_width) + 2;
-
-        /* Loop on resolution levels */
-        for (reslevelno = 0; reslevelno < codsty->nreslevels2decode; reslevelno++) {
-            Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
-            /* Loop on bands */
-            for (bandno = 0; bandno < rlevel->nbands; bandno++) {
-                int nb_precincts, precno;
-                Jpeg2000Band *band = rlevel->band + bandno;
-                int cblkno = 0, bandpos;
-
-                bandpos = bandno + (reslevelno > 0);
-
-                if (band->coord[0][0] == band->coord[0][1] ||
-                    band->coord[1][0] == band->coord[1][1])
-                    continue;
-
-                nb_precincts = rlevel->num_precincts_x * rlevel->num_precincts_y;
-                /* Loop on precincts */
-                for (precno = 0; precno < nb_precincts; precno++) {
-                    Jpeg2000Prec *prec = band->prec + precno;
-
-                    /* Loop on codeblocks */
-                    for (cblkno = 0;
-                         cblkno < prec->nb_codeblocks_width * prec->nb_codeblocks_height;
-                         cblkno++) {
-                        int x, y;
-                        Jpeg2000Cblk *cblk = prec->cblk + cblkno;
-                        int ret = decode_cblk(s, codsty, &t1, cblk,
+    const Jpeg2000DecoderContext *s = avctx->priv_data;
+    Jpeg2000CodeblockThread *cb     = s->cb + jobnr;
+    Jpeg2000Tile *tile              = s->tile + cb->tileno;
+    Jpeg2000Component *comp         = tile->comp + cb->compno;
+    Jpeg2000CodingStyle *codsty     = tile->codsty + cb->compno;
+    Jpeg2000ResLevel *rlevel        = comp->reslevel + cb->reslevelno;
+    Jpeg2000Band *band              = rlevel->band + cb->bandno;
+    Jpeg2000Prec *prec              = band->prec + cb->precno;
+    Jpeg2000Cblk *cblk              = prec->cblk + cb->cblkno;
+    int ret, x, y, bandpos          = cb->bandno + (cb->reslevelno > 0);
+
+    t1.stride = (1<<codsty->log2_cblk_width) + 2;
+    cb->coded = 0;
+
+                        ret = decode_cblk(s, codsty, &t1, cblk,
                                     cblk->coord[0][1] - cblk->coord[0][0],
                                     cblk->coord[1][1] - cblk->coord[1][0],
                                     bandpos, comp->roi_shift);
                         if (ret)
-                            coded = 1;
+                            cb->coded = 1;
                         else
-                            continue;
+                            return 0;
+
                         x = cblk->coord[0][0] - band->coord[0][0];
                         y = cblk->coord[1][0] - band->coord[1][0];
 
@@ -2000,16 +1993,28 @@ static inline void tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
                             dequantization_int_97(x, y, cblk, comp, &t1, band);
                         else
                             dequantization_int(x, y, cblk, comp, &t1, band);
-                   } /* end cblk */
-                } /*end prec */
-            } /* end band */
-        } /* end reslevel */
 
-        /* inverse DWT */
-        if (coded)
+    return 0;
+}
+
+static int jpeg2000_idwt(AVCodecContext *avctx, void *td,
+                         int jobnr, int threadnr)
+{
+    const Jpeg2000DecoderContext *s = avctx->priv_data;
+    Jpeg2000IdwtThread *idwt        = s->idwt + jobnr;
+    Jpeg2000Tile *tile              = s->tile + jobnr / s->ncomponents;
+    int compno                      = jobnr % s->ncomponents;
+    Jpeg2000Component *comp         = tile->comp + compno;
+    Jpeg2000CodingStyle *codsty     = tile->codsty + compno;
+
+    for (int i = idwt->cb_start; i < idwt->cb_end; i++) {
+        if (s->cb[i].coded) {
             ff_dwt_decode(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data);
+            break;
+        }
+    }
 
-    } /*end comp */
+    return 0;
 }
 
 #define WRITE_FRAME(D, PIXEL)                                                                     \
@@ -2079,15 +2084,13 @@ WRITE_FRAME(16, uint16_t)
 
 #undef WRITE_FRAME
 
-static int jpeg2000_decode_tile(AVCodecContext *avctx, void *td,
-                                int jobnr, int threadnr)
+static int jpeg2000_mct_write_frame(AVCodecContext *avctx, void *td,
+                                    int jobnr, int threadnr)
 {
     const Jpeg2000DecoderContext *s = avctx->priv_data;
     AVFrame *picture = td;
     Jpeg2000Tile *tile = s->tile + jobnr;
 
-    tile_codeblocks(s, tile);
-
     /* inverse MCT transformation */
     if (tile->codsty[0].mct)
         mct_decode(s, tile);
@@ -2477,11 +2480,79 @@ static av_cold int jpeg2000_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
+{
+    if (s->numXtiles * s->numYtiles > INT_MAX/s->ncomponents)
+        return AVERROR(ENOMEM);
+
+    if (av_realloc_array_reuse(&s->idwt, &s->idwt_allocated,
+                               s->numXtiles * s->numYtiles * s->ncomponents,
+                               INT_MAX, sizeof(*s->idwt)) < 0)
+        return AVERROR(ENOMEM);
+
+    for (int pass = 0; pass < 2; pass++) {
+        int cbs = 0;
+        for (int tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
+            for (int compno = 0; compno < s->ncomponents; compno++) {
+                Jpeg2000Tile *tile          = s->tile + tileno;
+                Jpeg2000Component *comp     = tile->comp + compno;
+                Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+                Jpeg2000IdwtThread *idwt    = s->idwt + compno + tileno * s->ncomponents;
+
+                idwt->cb_start = cbs;
+
+                for (int reslevelno = 0; reslevelno < codsty->nreslevels2decode; reslevelno++) {
+                    Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
+                    for (int bandno = 0; bandno < rlevel->nbands; bandno++) {
+                        int nb_precincts = rlevel->num_precincts_x * rlevel->num_precincts_y;
+                        Jpeg2000Band *band = rlevel->band + bandno;
+
+                        if (band->coord[0][0] == band->coord[0][1] ||
+                            band->coord[1][0] == band->coord[1][1])
+                            continue;
+
+                        for (int precno = 0; precno < nb_precincts; precno++) {
+                            Jpeg2000Prec *prec = band->prec + precno;
+                            int prec_cbs = prec->nb_codeblocks_width * prec->nb_codeblocks_height;
+
+                            if (cbs > INT_MAX - prec_cbs)
+                                return AVERROR(ENOMEM);
+
+                            for (int cblkno = 0; cblkno < prec_cbs; cblkno++, cbs++) {
+                                if (pass == 1) {
+                                    Jpeg2000CodeblockThread *cb = s->cb + cbs;
+                                    cb->tileno = tileno;
+                                    cb->compno = compno;
+                                    cb->reslevelno = reslevelno;
+                                    cb->bandno = bandno;
+                                    cb->precno = precno;
+                                    cb->cblkno = cblkno;
+                                }
+                            }
+                        }
+                    }
+                }
+
+                idwt->cb_end = cbs;
+            }
+        }
+
+        if (pass == 0) {
+            if (av_realloc_array_reuse(&s->cb, &s->cb_allocated,
+                                       cbs, INT_MAX, sizeof(*s->cb)) < 0)
+                return AVERROR(ENOMEM);
+        }
+
+        *cbs_out = cbs;
+    }
+    return 0;
+}
+
 static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
                                  int *got_frame, AVPacket *avpkt)
 {
     Jpeg2000DecoderContext *s = avctx->priv_data;
-    int ret;
+    int ret, cbs;
 
     s->avctx     = avctx;
     bytestream2_init(&s->g, avpkt->data, avpkt->size);
@@ -2548,7 +2619,12 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
         }
     }
 
-    avctx->execute2(avctx, jpeg2000_decode_tile, picture, NULL, s->numXtiles * s->numYtiles);
+    if ((ret = jpeg2000_setup_cbs(s, &cbs)) < 0 ||
+        (ret = avctx->execute2(avctx, jpeg2000_decode_cb, NULL, NULL, cbs)) < 0 ||
+        (ret = avctx->execute2(avctx, jpeg2000_idwt, NULL, NULL, s->numXtiles * s->numYtiles * s->ncomponents)) < 0 ||
+        (ret = avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles)) < 0)
+        goto end;
+
 
     jpeg2000_dec_cleanup(s);
 
@@ -2564,6 +2640,16 @@ end:
     return ret;
 }
 
+static av_cold int jpeg2000_decode_close(AVCodecContext *avctx)
+{
+    Jpeg2000DecoderContext *s = avctx->priv_data;
+
+    av_freep(&s->idwt);
+    av_freep(&s->cb);
+
+    return 0;
+}
+
 #define OFFSET(x) offsetof(Jpeg2000DecoderContext, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 
@@ -2589,6 +2675,7 @@ const FFCodec ff_jpeg2000_decoder = {
     .priv_data_size   = sizeof(Jpeg2000DecoderContext),
     .init             = jpeg2000_decode_init,
     FF_CODEC_DECODE_CB(jpeg2000_decode_frame),
+    .close            = jpeg2000_decode_close,
     .p.priv_class     = &jpeg2000_class,
     .p.max_lowres     = 5,
     .p.profiles       = NULL_IF_CONFIG_SMALL(ff_jpeg2000_profiles),
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [FFmpeg-devel] [PATCH 02/11] lavc/jpeg2000dec: Reindent
  2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
@ 2022-09-28 10:05 ` Tomas Härdin
  2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms Tomas Härdin
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:05 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0002-lavc-jpeg2000dec-Reindent.patch --]
[-- Type: text/x-patch, Size: 2478 bytes --]

From 99e9b6cf421538dee6e0280daf3d1ffc71c1acda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Fri, 10 Jun 2022 14:12:11 +0200
Subject: [PATCH 02/11] lavc/jpeg2000dec: Reindent

---
 libavcodec/jpeg2000dec.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index e823ae58ec..92bd76d90f 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -1973,26 +1973,26 @@ static int jpeg2000_decode_cb(AVCodecContext *avctx, void *td,
     t1.stride = (1<<codsty->log2_cblk_width) + 2;
     cb->coded = 0;
 
-                        ret = decode_cblk(s, codsty, &t1, cblk,
-                                    cblk->coord[0][1] - cblk->coord[0][0],
-                                    cblk->coord[1][1] - cblk->coord[1][0],
-                                    bandpos, comp->roi_shift);
-                        if (ret)
-                            cb->coded = 1;
-                        else
-                            return 0;
+    ret = decode_cblk(s, codsty, &t1, cblk,
+                cblk->coord[0][1] - cblk->coord[0][0],
+                cblk->coord[1][1] - cblk->coord[1][0],
+                bandpos, comp->roi_shift);
+    if (ret)
+        cb->coded = 1;
+    else
+        return 0;
 
-                        x = cblk->coord[0][0] - band->coord[0][0];
-                        y = cblk->coord[1][0] - band->coord[1][0];
+    x = cblk->coord[0][0] - band->coord[0][0];
+    y = cblk->coord[1][0] - band->coord[1][0];
 
-                        if (comp->roi_shift)
-                            roi_scale_cblk(cblk, comp, &t1);
-                        if (codsty->transform == FF_DWT97)
-                            dequantization_float(x, y, cblk, comp, &t1, band);
-                        else if (codsty->transform == FF_DWT97_INT)
-                            dequantization_int_97(x, y, cblk, comp, &t1, band);
-                        else
-                            dequantization_int(x, y, cblk, comp, &t1, band);
+    if (comp->roi_shift)
+        roi_scale_cblk(cblk, comp, &t1);
+    if (codsty->transform == FF_DWT97)
+        dequantization_float(x, y, cblk, comp, &t1, band);
+    else if (codsty->transform == FF_DWT97_INT)
+        dequantization_int_97(x, y, cblk, comp, &t1, band);
+    else
+        dequantization_int(x, y, cblk, comp, &t1, band);
 
     return 0;
 }
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [FFmpeg-devel] [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms
  2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
  2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 02/11] lavc/jpeg2000dec: Reindent Tomas Härdin
@ 2022-09-28 10:05 ` Tomas Härdin
  2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile() Tomas Härdin
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:05 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0003-lavc-jpeg2000dwt-Implement-sliced-transforms.patch --]
[-- Type: text/x-patch, Size: 25043 bytes --]

From 159d744f09f39e3350ac39ac5d05feaca22103af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Fri, 10 Jun 2022 17:18:14 +0200
Subject: [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms

lavc/tests/jpeg2000dwt tests this.
---
 libavcodec/j2kenc.c            |   3 +-
 libavcodec/jpeg2000.c          |   5 +-
 libavcodec/jpeg2000.h          |   2 +-
 libavcodec/jpeg2000dec.c       | 109 +++++++++++++++++++++++----
 libavcodec/jpeg2000dwt.c       | 132 +++++++++++++++++++--------------
 libavcodec/jpeg2000dwt.h       |   6 +-
 libavcodec/tests/jpeg2000dwt.c |  15 ++--
 7 files changed, 189 insertions(+), 83 deletions(-)

diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
index e883d5deb7..cd325e94e0 100644
--- a/libavcodec/j2kenc.c
+++ b/libavcodec/j2kenc.c
@@ -494,7 +494,8 @@ static int init_tiles(Jpeg2000EncoderContext *s)
                                                 s->cbps[compno],
                                                 compno?1<<s->chroma_shift[0]:1,
                                                 compno?1<<s->chroma_shift[1]:1,
-                                                s->avctx
+                                                s->avctx,
+                                                1
                                                )) < 0)
                     return ret;
             }
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 0aa984bc53..945b787565 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -467,7 +467,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                Jpeg2000CodingStyle *codsty,
                                Jpeg2000QuantStyle *qntsty,
                                int cbps, int dx, int dy,
-                               AVCodecContext *avctx)
+                               AVCodecContext *avctx, int max_slices)
 {
     int reslevelno, bandno, gbandno = 0, ret, i, j;
     uint32_t csize;
@@ -479,7 +479,8 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
 
     if (ret = ff_jpeg2000_dwt_init(&comp->dwt, comp->coord,
                                    codsty->nreslevels2decode - 1,
-                                   codsty->transform))
+                                   codsty->transform,
+                                   max_slices))
         return ret;
 
     if (av_image_check_size(comp->coord[0][1] - comp->coord[0][0],
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index e5ecb4cbf9..389813a9b9 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -280,7 +280,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                Jpeg2000CodingStyle *codsty,
                                Jpeg2000QuantStyle *qntsty,
                                int cbps, int dx, int dy,
-                               AVCodecContext *ctx);
+                               AVCodecContext *ctx, int max_slices);
 
 void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty);
 
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 92bd76d90f..273346538f 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -150,6 +150,10 @@ typedef struct Jpeg2000DecoderContext {
     size_t idwt_allocated;
     Jpeg2000CodeblockThread *cb;
     size_t cb_allocated;
+
+    // used for idwt slicing
+    int reslevel, dir, slices;
+    int have_dwt97_int; // 1 if any coding style is FF_DWT97_INT
 } Jpeg2000DecoderContext;
 
 /* get_bits functions for JPEG2000 packet bitstream
@@ -545,9 +549,10 @@ static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
     }
     c->transform = bytestream2_get_byteu(&s->g); // DWT transformation type
     /* set integer 9/7 DWT in case of BITEXACT flag */
-    if ((s->avctx->flags & AV_CODEC_FLAG_BITEXACT) && (c->transform == FF_DWT97))
+    if ((s->avctx->flags & AV_CODEC_FLAG_BITEXACT) && (c->transform == FF_DWT97)) {
         c->transform = FF_DWT97_INT;
-    else if (c->transform == FF_DWT53) {
+        s->have_dwt97_int = 1;
+    } else if (c->transform == FF_DWT53) {
         s->avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
     }
 
@@ -1056,7 +1061,7 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
             return AVERROR_INVALIDDATA;
         if (ret = ff_jpeg2000_init_component(comp, codsty, qntsty,
                                              s->cbps[compno], s->cdx[compno],
-                                             s->cdy[compno], s->avctx))
+                                             s->cdy[compno], s->avctx, s->slices))
             return ret;
     }
     return 0;
@@ -1997,19 +2002,74 @@ static int jpeg2000_decode_cb(AVCodecContext *avctx, void *td,
     return 0;
 }
 
+static int jpeg2000_dwt97_int_preshift(AVCodecContext *avctx, void *td,
+                                       int jobnr, int threadnr)
+{
+    const Jpeg2000DecoderContext *s = avctx->priv_data;
+    Jpeg2000IdwtThread *idwt        = s->idwt + jobnr / s->slices;
+    Jpeg2000Tile *tile              = s->tile + jobnr / s->slices / s->ncomponents;
+    int compno                      = (jobnr / s->slices) % s->ncomponents;
+    int slice                       = jobnr % s->slices;
+    Jpeg2000Component *comp         = tile->comp + compno;
+    Jpeg2000CodingStyle *codsty     = tile->codsty + compno;
+    int a = comp->dwt.linelen[comp->dwt.ndeclevels - 1][0] *
+            comp->dwt.linelen[comp->dwt.ndeclevels - 1][1];
+    int as = (a + s->slices - 1)/s->slices;
+
+    for (int i = idwt->cb_start; i < idwt->cb_end; i++) {
+        if (s->cb[i].coded) {
+            if (codsty->transform == FF_DWT97_INT) {
+                for (int i = as*slice; i - as < as*slice; i++)
+                    comp->i_data[i] *= 1LL << I_PRESHIFT;
+            }
+            break;
+        }
+    }
+
+    return 0;
+}
+
 static int jpeg2000_idwt(AVCodecContext *avctx, void *td,
                          int jobnr, int threadnr)
 {
     const Jpeg2000DecoderContext *s = avctx->priv_data;
-    Jpeg2000IdwtThread *idwt        = s->idwt + jobnr;
-    Jpeg2000Tile *tile              = s->tile + jobnr / s->ncomponents;
-    int compno                      = jobnr % s->ncomponents;
+    Jpeg2000IdwtThread *idwt        = s->idwt + jobnr / s->slices;
+    Jpeg2000Tile *tile              = s->tile + jobnr / s->slices / s->ncomponents;
+    int compno                      = (jobnr / s->slices) % s->ncomponents;
+    int slice                       = jobnr % s->slices;
     Jpeg2000Component *comp         = tile->comp + compno;
     Jpeg2000CodingStyle *codsty     = tile->codsty + compno;
 
     for (int i = idwt->cb_start; i < idwt->cb_end; i++) {
         if (s->cb[i].coded) {
-            ff_dwt_decode(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data);
+            ff_dwt_decode_thread(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data, s->reslevel, s->dir, slice, s->slices);
+            break;
+        }
+    }
+
+    return 0;
+}
+
+static int jpeg2000_dwt97_int_postshift(AVCodecContext *avctx, void *td,
+                                        int jobnr, int threadnr)
+{
+    const Jpeg2000DecoderContext *s = avctx->priv_data;
+    Jpeg2000IdwtThread *idwt        = s->idwt + jobnr / s->slices;
+    Jpeg2000Tile *tile              = s->tile + jobnr / s->slices / s->ncomponents;
+    int compno                      = (jobnr / s->slices) % s->ncomponents;
+    int slice                       = jobnr % s->slices;
+    Jpeg2000Component *comp         = tile->comp + compno;
+    Jpeg2000CodingStyle *codsty     = tile->codsty + compno;
+    int a = comp->dwt.linelen[comp->dwt.ndeclevels - 1][0] *
+            comp->dwt.linelen[comp->dwt.ndeclevels - 1][1];
+    int as = (a + s->slices - 1)/s->slices;
+
+    for (int i = idwt->cb_start; i < idwt->cb_end; i++) {
+        if (s->cb[i].coded) {
+            if (codsty->transform == FF_DWT97_INT) {
+                for (int i = as*slice; i - as < as*slice; i++)
+                    comp->i_data[i] = (comp->i_data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+            }
             break;
         }
     }
@@ -2480,7 +2540,7 @@ static av_cold int jpeg2000_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
+static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out, int *maxreslevels_out)
 {
     if (s->numXtiles * s->numYtiles > INT_MAX/s->ncomponents)
         return AVERROR(ENOMEM);
@@ -2491,7 +2551,7 @@ static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
         return AVERROR(ENOMEM);
 
     for (int pass = 0; pass < 2; pass++) {
-        int cbs = 0;
+        int cbs = 0, maxreslevels = 0;
         for (int tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
             for (int compno = 0; compno < s->ncomponents; compno++) {
                 Jpeg2000Tile *tile          = s->tile + tileno;
@@ -2500,6 +2560,7 @@ static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
                 Jpeg2000IdwtThread *idwt    = s->idwt + compno + tileno * s->ncomponents;
 
                 idwt->cb_start = cbs;
+                maxreslevels = FFMAX(maxreslevels, codsty->nreslevels2decode);
 
                 for (int reslevelno = 0; reslevelno < codsty->nreslevels2decode; reslevelno++) {
                     Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
@@ -2544,6 +2605,7 @@ static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
         }
 
         *cbs_out = cbs;
+        *maxreslevels_out = maxreslevels;
     }
     return 0;
 }
@@ -2552,7 +2614,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
                                  int *got_frame, AVPacket *avpkt)
 {
     Jpeg2000DecoderContext *s = avctx->priv_data;
-    int ret, cbs;
+    int ret, cbs, maxreslevels;
 
     s->avctx     = avctx;
     bytestream2_init(&s->g, avpkt->data, avpkt->size);
@@ -2604,6 +2666,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
         goto end;
     picture->pict_type = AV_PICTURE_TYPE_I;
     picture->key_frame = 1;
+    s->slices = avctx->active_thread_type == FF_THREAD_SLICE ? avctx->thread_count : 1;
 
     if (ret = jpeg2000_read_bitstream_packets(s))
         goto end;
@@ -2619,12 +2682,30 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
         }
     }
 
-    if ((ret = jpeg2000_setup_cbs(s, &cbs)) < 0 ||
-        (ret = avctx->execute2(avctx, jpeg2000_decode_cb, NULL, NULL, cbs)) < 0 ||
-        (ret = avctx->execute2(avctx, jpeg2000_idwt, NULL, NULL, s->numXtiles * s->numYtiles * s->ncomponents)) < 0 ||
-        (ret = avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles)) < 0)
+    if ((ret = jpeg2000_setup_cbs(s, &cbs, &maxreslevels)) < 0 ||
+        (ret = avctx->execute2(avctx, jpeg2000_decode_cb, NULL, NULL, cbs)) < 0)
+        goto end;
+
+    if (s->have_dwt97_int &&
+        (ret = avctx->execute2(avctx, jpeg2000_dwt97_int_preshift, NULL, NULL,
+            s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
         goto end;
 
+    for (s->reslevel = 0; s->reslevel < maxreslevels; s->reslevel++) {
+        for (s->dir = 0; s->dir < 2; s->dir++) {
+            if ((ret = avctx->execute2(avctx, jpeg2000_idwt, NULL, NULL,
+                    s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
+                goto end;
+        }
+    }
+
+    if (s->have_dwt97_int &&
+        (ret = avctx->execute2(avctx, jpeg2000_dwt97_int_postshift, NULL, NULL,
+            s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
+        goto end;
+
+    if ((ret = avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles)) < 0)
+        goto end;
 
     jpeg2000_dec_cleanup(s);
 
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index f2da7307c4..921461b6d7 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -45,7 +45,6 @@
 #define I_LFTG_DELTA   29066ll
 #define I_LFTG_K       80621ll
 #define I_LFTG_X       53274ll
-#define I_PRESHIFT 8
 
 static inline void extend53(int *p, int i0, int i1)
 {
@@ -322,24 +321,24 @@ static void sr_1d53(unsigned *p, int i0, int i1)
         p[2 * i + 1] += (int)(p[2 * i] + p[2 * i + 2]) >> 1;
 }
 
-static void dwt_decode53(DWTContext *s, int *t)
+static void dwt_decode53(DWTContext *s, int *t, int lev, int dir, int slice, int slices)
 {
-    int lev;
     int w     = s->linelen[s->ndeclevels - 1][0];
-    int32_t *line = s->i_linebuf;
-    line += 3;
+    int32_t *line = s->i_linebuf + slice * s->linesize + 3;
 
-    for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
             mh = s->mod[lev][0],
             mv = s->mod[lev][1],
+            sh = (lh + slices - 1)/slices,
+            sv = (lv + slices - 1)/slices,
             lp;
         int *l;
 
+    if (dir == 0) {
         // HOR_SD
         l = line + mh;
-        for (lp = 0; lp < lv; lp++) {
+        for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mh; i < lh; i += 2, j++)
@@ -352,10 +351,10 @@ static void dwt_decode53(DWTContext *s, int *t)
             for (i = 0; i < lh; i++)
                 t[w * lp + i] = l[i];
         }
-
+    } else {
         // VER_SD
         l = line + mv;
-        for (lp = 0; lp < lh; lp++) {
+        for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mv; i < lv; i += 2, j++)
@@ -398,25 +397,26 @@ static void sr_1d97_float(float *p, int i0, int i1)
         p[2 * i + 1] += F_LFTG_ALPHA * (p[2 * i]     + p[2 * i + 2]);
 }
 
-static void dwt_decode97_float(DWTContext *s, float *t)
+static void dwt_decode97_float(DWTContext *s, float *t, int lev, int dir, int slice, int slices)
 {
-    int lev;
     int w       = s->linelen[s->ndeclevels - 1][0];
-    float *line = s->f_linebuf;
-    float *data = t;
     /* position at index O of line range [0-5,w+5] cf. extend function */
-    line += 5;
+    float *line = s->f_linebuf + slice * s->linesize + 5;
+    float *data = t;
 
-    for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
             mh = s->mod[lev][0],
             mv = s->mod[lev][1],
+            sh = (lh + slices - 1)/slices,
+            sv = (lv + slices - 1)/slices,
             lp;
         float *l;
+
+    if (dir == 0) {
         // HOR_SD
         l = line + mh;
-        for (lp = 0; lp < lv; lp++) {
+        for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mh; i < lh; i += 2, j++)
@@ -429,10 +429,10 @@ static void dwt_decode97_float(DWTContext *s, float *t)
             for (i = 0; i < lh; i++)
                 data[w * lp + i] = l[i];
         }
-
+    } else {
         // VER_SD
         l = line + mv;
-        for (lp = 0; lp < lh; lp++) {
+        for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mv; i < lv; i += 2, j++)
@@ -475,30 +475,26 @@ static void sr_1d97_int(int32_t *p, int i0, int i1)
         p[2 * i + 1] += (I_LFTG_ALPHA * (p[2 * i]     + (int64_t)p[2 * i + 2]) + (1 << 15)) >> 16;
 }
 
-static void dwt_decode97_int(DWTContext *s, int32_t *t)
+static void dwt_decode97_int(DWTContext *s, int32_t *t, int lev, int dir, int slice, int slices)
 {
-    int lev;
     int w       = s->linelen[s->ndeclevels - 1][0];
-    int h       = s->linelen[s->ndeclevels - 1][1];
-    int i;
-    int32_t *line = s->i_linebuf;
-    int32_t *data = t;
     /* position at index O of line range [0-5,w+5] cf. extend function */
-    line += 5;
-
-    for (i = 0; i < w * h; i++)
-        data[i] *= 1LL << I_PRESHIFT;
+    int32_t *line = s->i_linebuf + slice * s->linesize + 5;
+    int32_t *data = t;
 
-    for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
             mh = s->mod[lev][0],
             mv = s->mod[lev][1],
+            sh = (lh + slices - 1)/slices,
+            sv = (lv + slices - 1)/slices,
             lp;
         int32_t *l;
+
+    if (dir == 0) {
         // HOR_SD
         l = line + mh;
-        for (lp = 0; lp < lv; lp++) {
+        for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
             int i, j = 0;
             // rescale with interleaving
             for (i = mh; i < lh; i += 2, j++)
@@ -511,10 +507,10 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
             for (i = 0; i < lh; i++)
                 data[w * lp + i] = l[i];
         }
-
+    } else {
         // VER_SD
         l = line + mv;
-        for (lp = 0; lp < lh; lp++) {
+        for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
             int i, j = 0;
             // rescale with interleaving
             for (i = mv; i < lv; i += 2, j++)
@@ -528,26 +524,29 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
                 data[w * i + lp] = l[i];
         }
     }
-
-    for (i = 0; i < w * h; i++)
-        data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
 }
 
 int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
-                         int decomp_levels, int type)
+                         int decomp_levels, int type, int max_slices)
 {
-    int i, j, lev = decomp_levels, maxlen,
+    int i, j, lev = decomp_levels,
         b[2][2];
 
     s->ndeclevels = decomp_levels;
     s->type       = type;
+    s->max_slices = max_slices;
+
+    if (s->max_slices > INT_MAX/FFMAX(sizeof(*s->f_linebuf),sizeof(*s->i_linebuf)))
+        return AVERROR(ENOMEM);
 
     for (i = 0; i < 2; i++)
         for (j = 0; j < 2; j++)
             b[i][j] = border[i][j];
 
-    maxlen = FFMAX(b[0][1] - b[0][0],
-                   b[1][1] - b[1][0]);
+    s->linesize   = FFMAX(b[0][1] - b[0][0],
+                          b[1][1] - b[1][0]) +
+                    (type == FF_DWT53 ? 6 : 12);
+
     while (--lev >= 0)
         for (i = 0; i < 2; i++) {
             s->linelen[lev][i] = b[i][1] - b[i][0];
@@ -555,24 +554,15 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
             for (j = 0; j < 2; j++)
                 b[i][j] = (b[i][j] + 1) >> 1;
         }
-    switch (type) {
-    case FF_DWT97:
-        s->f_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->f_linebuf));
+
+    if (type == FF_DWT97) {
+        s->f_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->f_linebuf));
         if (!s->f_linebuf)
             return AVERROR(ENOMEM);
-        break;
-     case FF_DWT97_INT:
-        s->i_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->i_linebuf));
-        if (!s->i_linebuf)
-            return AVERROR(ENOMEM);
-        break;
-    case FF_DWT53:
-        s->i_linebuf = av_malloc_array((maxlen +  6), sizeof(*s->i_linebuf));
+    } else {
+        s->i_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->i_linebuf));
         if (!s->i_linebuf)
             return AVERROR(ENOMEM);
-        break;
-    default:
-        return -1;
     }
     return 0;
 }
@@ -597,18 +587,46 @@ int ff_dwt_encode(DWTContext *s, void *t)
 
 int ff_dwt_decode(DWTContext *s, void *t)
 {
-    if (s->ndeclevels == 0)
+    int w = s->linelen[s->ndeclevels - 1][0];
+    int h = s->linelen[s->ndeclevels - 1][1];
+    int32_t *data = t;
+
+    if (s->type == FF_DWT97_INT)
+        for (int i = 0; i < w * h; i++)
+            data[i] *= 1LL << I_PRESHIFT;
+
+    for (int lev = 0; lev < s->ndeclevels; lev++)
+        for (int dir = 0; dir < 2; dir++)
+            for (int slice = 0; slice < s->max_slices; slice++) {
+                int ret = ff_dwt_decode_thread(s, t, lev, dir, slice, s->max_slices);
+                if (ret)
+                    return ret;
+            }
+
+    if (s->type == FF_DWT97_INT)
+        for (int i = 0; i < w * h; i++)
+            data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+
+    return 0;
+}
+
+int ff_dwt_decode_thread(DWTContext *s, void *t, int lev, int dir, int slice, int slices)
+{
+    slices = FFMIN(s->max_slices, slices);
+
+    // lev can be >= s->ndeclevels in files with mixed reslevels in tiles/components
+    if (s->ndeclevels == 0 || lev >= s->ndeclevels || slice >= slices)
         return 0;
 
     switch (s->type) {
     case FF_DWT97:
-        dwt_decode97_float(s, t);
+        dwt_decode97_float(s, t, lev, dir, slice, slices);
         break;
     case FF_DWT97_INT:
-        dwt_decode97_int(s, t);
+        dwt_decode97_int(s, t, lev, dir, slice, slices);
         break;
     case FF_DWT53:
-        dwt_decode53(s, t);
+        dwt_decode53(s, t, lev, dir, slice, slices);
         break;
     default:
         return -1;
diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h
index 718d183ac1..d5e94c9916 100644
--- a/libavcodec/jpeg2000dwt.h
+++ b/libavcodec/jpeg2000dwt.h
@@ -32,6 +32,7 @@
 #define FF_DWT_MAX_DECLVLS 32 ///< max number of decomposition levels
 #define F_LFTG_K      1.230174104914001f
 #define F_LFTG_X      0.812893066115961f
+#define I_PRESHIFT 8
 
 enum DWTType {
     FF_DWT97,
@@ -48,6 +49,8 @@ typedef struct DWTContext {
     uint8_t type;                        ///< 0 for 9/7; 1 for 5/3
     int32_t *i_linebuf;                  ///< int buffer used by transform
     float   *f_linebuf;                  ///< float buffer used by transform
+    int max_slices;
+    int linesize;
 } DWTContext;
 
 /**
@@ -58,10 +61,11 @@ typedef struct DWTContext {
  * @param type              0 for DWT 9/7; 1 for DWT 5/3
  */
 int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
-                         int decomp_levels, int type);
+                         int decomp_levels, int type, int max_slices);
 
 int ff_dwt_encode(DWTContext *s, void *t);
 int ff_dwt_decode(DWTContext *s, void *t);
+int ff_dwt_decode_thread(DWTContext *s, void *t, int lev, int dir, int slice, int slices);
 
 void ff_dwt_destroy(DWTContext *s);
 
diff --git a/libavcodec/tests/jpeg2000dwt.c b/libavcodec/tests/jpeg2000dwt.c
index 0e5a6ed947..d4d9e6d224 100644
--- a/libavcodec/tests/jpeg2000dwt.c
+++ b/libavcodec/tests/jpeg2000dwt.c
@@ -31,12 +31,12 @@
 
 #define MAX_W 256
 
-static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, int type, int max_diff) {
+static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, int type, int max_diff, int slices) {
     int ret, j;
     DWTContext s1={{{0}}}, *s= &s1;
     int64_t err2 = 0;
 
-    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, type);
+    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, type, slices);
     if (ret < 0) {
         fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
         return 1;
@@ -70,12 +70,12 @@ static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, i
     return 0;
 }
 
-static int test_dwtf(float *array, float *ref, int border[2][2], int decomp_levels, float max_diff) {
+static int test_dwtf(float *array, float *ref, int border[2][2], int decomp_levels, float max_diff, int slices) {
     int ret, j;
     DWTContext s1={{{0}}}, *s= &s1;
     double err2 = 0;
 
-    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, FF_DWT97);
+    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, FF_DWT97, slices);
     if (ret < 0) {
         fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
         return 1;
@@ -125,19 +125,20 @@ int main(void) {
         arrayf[i] = reff[i] = array[i] = ref[i] =  av_lfg_get(&prng) % 2048;
 
     for (i = 0; i < 100; i++) {
+        int slices = 1 + (i % 10);
         for (j=0; j<4; j++)
             border[j>>1][j&1] = av_lfg_get(&prng) % MAX_W;
         if (border[0][0] >= border[0][1] || border[1][0] >= border[1][1])
             continue;
         decomp_levels = av_lfg_get(&prng) % FF_DWT_MAX_DECLVLS;
 
-        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT53, 0);
+        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT53, 0, slices);
         if (ret)
             return ret;
-        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT97_INT, FFMIN(7+5*decomp_levels, 15+3*decomp_levels));
+        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT97_INT, FFMIN(7+5*decomp_levels, 15+3*decomp_levels), slices);
         if (ret)
             return ret;
-        ret = test_dwtf(arrayf, reff, border, decomp_levels, 0.05);
+        ret = test_dwtf(arrayf, reff, border, decomp_levels, 0.05, slices);
         if (ret)
             return ret;
     }
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile()
  2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
  2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 02/11] lavc/jpeg2000dec: Reindent Tomas Härdin
  2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms Tomas Härdin
@ 2022-09-28 10:06 ` Tomas Härdin
  2022-09-28 14:14   ` Tomas Härdin
  2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and av_reallocz_array_reuse() to eliminate lots of allocations Tomas Härdin
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:06 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 46 bytes --]

This is the one that needs the new execute2()

[-- Attachment #2: 0004-lavc-jpeg2000dec-Thread-init_tile.patch --]
[-- Type: text/x-patch, Size: 3415 bytes --]

From 4e7c65a7a3e049396ce5e3c01db335a532889115 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Mon, 13 Jun 2022 15:09:17 +0200
Subject: [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile()

---
 libavcodec/jpeg2000dec.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 273346538f..00aa73e261 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -1019,12 +1019,19 @@ static int get_ppt(Jpeg2000DecoderContext *s, int n)
     return 0;
 }
 
-static int init_tile(Jpeg2000DecoderContext *s, int tileno)
+static int init_tile(AVCodecContext *avctx, void *td,
+                     int jobnr, int threadnr)
 {
-    int compno;
-    int tilex = tileno % s->numXtiles;
-    int tiley = tileno / s->numXtiles;
-    Jpeg2000Tile *tile = s->tile + tileno;
+    const Jpeg2000DecoderContext *s = avctx->priv_data;
+    int tileno                      = jobnr / s->ncomponents;
+    int tilex                       = tileno % s->numXtiles;
+    int tiley                       = tileno / s->numXtiles;
+    int compno                      = jobnr % s->ncomponents;
+    Jpeg2000Tile *tile              = s->tile + tileno;
+    Jpeg2000Component *comp         = tile->comp + compno;
+    Jpeg2000CodingStyle *codsty     = tile->codsty + compno;
+    Jpeg2000QuantStyle  *qntsty     = tile->qntsty + compno;
+    int ret; // global bandno
 
     if (!tile->comp)
         return AVERROR(ENOMEM);
@@ -1034,12 +1041,6 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
     tile->coord[1][0] = av_clip(tiley       * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height);
     tile->coord[1][1] = av_clip((tiley + 1) * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height);
 
-    for (compno = 0; compno < s->ncomponents; compno++) {
-        Jpeg2000Component *comp = tile->comp + compno;
-        Jpeg2000CodingStyle *codsty = tile->codsty + compno;
-        Jpeg2000QuantStyle  *qntsty = tile->qntsty + compno;
-        int ret; // global bandno
-
         comp->coord_o[0][0] = tile->coord[0][0];
         comp->coord_o[0][1] = tile->coord[0][1];
         comp->coord_o[1][0] = tile->coord[1][0];
@@ -1063,7 +1064,7 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
                                              s->cbps[compno], s->cdx[compno],
                                              s->cdy[compno], s->avctx, s->slices))
             return ret;
-    }
+
     return 0;
 }
 
@@ -2371,9 +2372,6 @@ static int jpeg2000_read_bitstream_packets(Jpeg2000DecoderContext *s)
     for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
         Jpeg2000Tile *tile = s->tile + tileno;
 
-        if ((ret = init_tile(s, tileno)) < 0)
-            return ret;
-
         if ((ret = jpeg2000_decode_packets(s, tile)) < 0)
             return ret;
     }
@@ -2668,6 +2666,9 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
     picture->key_frame = 1;
     s->slices = avctx->active_thread_type == FF_THREAD_SLICE ? avctx->thread_count : 1;
 
+    if ((ret = avctx->execute2(avctx, init_tile, NULL, NULL, s->numXtiles * s->numYtiles * s->ncomponents)) < 0)
+        goto end;
+
     if (ret = jpeg2000_read_bitstream_packets(s))
         goto end;
 
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [FFmpeg-devel] [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and av_reallocz_array_reuse() to eliminate lots of allocations
  2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
                   ` (2 preceding siblings ...)
  2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile() Tomas Härdin
@ 2022-09-28 10:06 ` Tomas Härdin
  2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 06/11] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent Tomas Härdin
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:06 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0005-lavc-jpeg2000-Use-av_realloc_array_reuse-and-av_real.patch --]
[-- Type: text/x-patch, Size: 17753 bytes --]

From 5a5986c29d62933f3f2cd2259becb763f3719eaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Mon, 13 Jun 2022 17:04:10 +0200
Subject: [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and
 av_reallocz_array_reuse() to eliminate lots of allocations

---
 libavcodec/jpeg2000.c    | 79 +++++++++++++++++++++-------------------
 libavcodec/jpeg2000.h    | 10 +++++
 libavcodec/jpeg2000dec.c | 48 ++++++++++--------------
 libavcodec/jpeg2000dwt.c |  9 ++++-
 libavcodec/jpeg2000dwt.h |  2 +
 5 files changed, 80 insertions(+), 68 deletions(-)

diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 945b787565..afff9809e4 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -52,17 +52,23 @@ static int32_t tag_tree_size(int w, int h)
 }
 
 /* allocate the memory for tag tree */
-static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h)
+static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h)
 {
     int pw = w, ph = h;
-    Jpeg2000TgtNode *res, *t, *t2;
+    Jpeg2000TgtNode *t, *t2;
     int32_t tt_size;
+    size_t prod;
 
     tt_size = tag_tree_size(w, h);
 
-    t = res = av_calloc(tt_size, sizeof(*t));
-    if (!res)
-        return NULL;
+    if (av_size_mult(tt_size, sizeof(*t), &prod))
+        return AVERROR(ENOMEM);
+
+    av_fast_malloc(old, size, prod);
+    if (!*old)
+        return AVERROR(ENOMEM);
+    t = *old;
+    memset(*old, 0, prod);
 
     while (w > 1 || h > 1) {
         int i, j;
@@ -80,7 +86,7 @@ static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h)
         t = t2;
     }
     t[0].parent = NULL;
-    return res;
+    return 0;
 }
 
 void ff_tag_tree_zero(Jpeg2000TgtNode *t, int w, int h, int val)
@@ -278,7 +284,7 @@ static int init_prec(AVCodecContext *avctx,
                      int log2_band_prec_height)
 {
     Jpeg2000Prec *prec = band->prec + precno;
-    int nb_codeblocks, cblkno;
+    int nb_codeblocks, cblkno, ret;
 
     prec->decoded_layers = 0;
 
@@ -316,25 +322,22 @@ static int init_prec(AVCodecContext *avctx,
 
 
     /* Tag trees initialization */
-    prec->cblkincl =
-        ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
-                                  prec->nb_codeblocks_height);
-    if (!prec->cblkincl)
-        return AVERROR(ENOMEM);
-
-    prec->zerobits =
-        ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
-                                  prec->nb_codeblocks_height);
-    if (!prec->zerobits)
-        return AVERROR(ENOMEM);
+    if ((ret = ff_jpeg2000_tag_tree_init(&prec->cblkincl,
+                                         &prec->cblkincl_size,
+                                         prec->nb_codeblocks_width,
+                                         prec->nb_codeblocks_height)) < 0 ||
+        (ret = ff_jpeg2000_tag_tree_init(&prec->zerobits,
+                                         &prec->zerobits_size,
+                                         prec->nb_codeblocks_width,
+                                         prec->nb_codeblocks_height)) < 0)
+        return ret;
 
     if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) {
         prec->cblk = NULL;
         return AVERROR(ENOMEM);
     }
     nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height;
-    prec->cblk = av_calloc(nb_codeblocks, sizeof(*prec->cblk));
-    if (!prec->cblk)
+    if (av_reallocz_array_reuse(&prec->cblk, &prec->cblk_allocated, nb_codeblocks, INT_MAX, sizeof(*prec->cblk)))
         return AVERROR(ENOMEM);
     for (cblkno = 0; cblkno < nb_codeblocks; cblkno++) {
         Jpeg2000Cblk *cblk = prec->cblk + cblkno;
@@ -376,6 +379,7 @@ static int init_prec(AVCodecContext *avctx,
         cblk->length    = 0;
         cblk->npasses   = 0;
         if (av_codec_is_encoder(avctx->codec)) {
+            av_freep(&cblk->layers);
             cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers));
             if (!cblk->layers)
                 return AVERROR(ENOMEM);
@@ -448,8 +452,7 @@ static int init_band(AVCodecContext *avctx,
         return AVERROR(ENOMEM);
     }
     nb_precincts = reslevel->num_precincts_x * reslevel->num_precincts_y;
-    band->prec = av_calloc(nb_precincts, sizeof(*band->prec));
-    if (!band->prec)
+    if (av_reallocz_array_reuse(&band->prec, &band->prec_allocated, nb_precincts, INT_MAX, sizeof(*band->prec)))
         return AVERROR(ENOMEM);
 
     for (precno = 0; precno < nb_precincts; precno++) {
@@ -471,6 +474,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
 {
     int reslevelno, bandno, gbandno = 0, ret, i, j;
     uint32_t csize;
+    size_t prod;
 
     if (codsty->nreslevels2decode <= 0) {
         av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode);
@@ -496,19 +500,22 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
 
     if (codsty->transform == FF_DWT97) {
         csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->f_data);
-        comp->i_data = NULL;
-        comp->f_data = av_calloc(csize, sizeof(*comp->f_data));
+        if (av_size_mult(csize, sizeof(*comp->f_data), &prod))
+            return AVERROR(ENOMEM);
+        av_fast_malloc(&comp->f_data, &comp->f_data_size, prod);
         if (!comp->f_data)
             return AVERROR(ENOMEM);
+        memset(comp->f_data, 0, prod);
     } else {
         csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
-        comp->f_data = NULL;
-        comp->i_data = av_calloc(csize, sizeof(*comp->i_data));
+        if (av_size_mult(csize, sizeof(*comp->i_data), &prod))
+            return AVERROR(ENOMEM);
+        av_fast_malloc(&comp->i_data, &comp->i_data_size, prod);
         if (!comp->i_data)
             return AVERROR(ENOMEM);
+        memset(comp->i_data, 0, prod);
     }
-    comp->reslevel = av_calloc(codsty->nreslevels, sizeof(*comp->reslevel));
-    if (!comp->reslevel)
+    if (av_reallocz_array_reuse(&comp->reslevel, &comp->reslevel_allocated, codsty->nreslevels, INT_MAX, sizeof(*comp->reslevel)))
         return AVERROR(ENOMEM);
     /* LOOP on resolution levels */
     for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++) {
@@ -555,8 +562,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                         reslevel->log2_prec_height) -
                 (reslevel->coord[1][0] >> reslevel->log2_prec_height);
 
-        reslevel->band = av_calloc(reslevel->nbands, sizeof(*reslevel->band));
-        if (!reslevel->band)
+        if (av_reallocz_array_reuse(&reslevel->band, &reslevel->band_allocated, reslevel->nbands, INT_MAX, sizeof(*reslevel->band)))
             return AVERROR(ENOMEM);
 
         if (reslevel->num_precincts_x * (uint64_t)reslevel->num_precincts_y * reslevel->nbands > avctx->max_pixels / sizeof(*reslevel->band->prec))
@@ -597,9 +603,9 @@ void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
 
 void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
 {
-    int reslevelno, bandno, precno;
+    size_t reslevelno, bandno, precno;
     for (reslevelno = 0;
-         comp->reslevel && reslevelno < codsty->nreslevels;
+         comp->reslevel && reslevelno < comp->reslevel_allocated;
          reslevelno++) {
         Jpeg2000ResLevel *reslevel;
 
@@ -607,23 +613,20 @@ void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
             continue;
 
         reslevel = comp->reslevel + reslevelno;
-        for (bandno = 0; bandno < reslevel->nbands; bandno++) {
+        for (bandno = 0; bandno < reslevel->band_allocated; bandno++) {
             Jpeg2000Band *band;
 
             if (!reslevel->band)
                 continue;
 
             band = reslevel->band + bandno;
-            for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++) {
+            for (precno = 0; precno < band->prec_allocated; precno++) {
                 if (band->prec) {
                     Jpeg2000Prec *prec = band->prec + precno;
-                    int nb_code_blocks = prec->nb_codeblocks_height * prec->nb_codeblocks_width;
-
                     av_freep(&prec->zerobits);
                     av_freep(&prec->cblkincl);
                     if (prec->cblk) {
-                        int cblkno;
-                        for (cblkno = 0; cblkno < nb_code_blocks; cblkno ++) {
+                        for (size_t cblkno = 0; cblkno < prec->cblk_allocated; cblkno ++) {
                             Jpeg2000Cblk *cblk = &prec->cblk[cblkno];
                             av_freep(&cblk->data);
                             av_freep(&cblk->passes);
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index 389813a9b9..6594d8e5cb 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -179,6 +179,7 @@ typedef struct Jpeg2000Cblk {
     uint8_t incl;
     uint16_t length;
     uint16_t *lengthinc;
+    size_t lengthinc_allocated;
     uint8_t nb_lengthinc;
     uint8_t lblock;
     uint8_t *data;
@@ -186,6 +187,7 @@ typedef struct Jpeg2000Cblk {
     int nb_terminations;
     int nb_terminationsinc;
     int *data_start;
+    size_t data_start_allocated;
     Jpeg2000Pass *passes;
     Jpeg2000Layer *layers;
     int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
@@ -195,8 +197,11 @@ typedef struct Jpeg2000Prec {
     int nb_codeblocks_width;
     int nb_codeblocks_height;
     Jpeg2000TgtNode *zerobits;
+    unsigned int zerobits_size;
     Jpeg2000TgtNode *cblkincl;
+    unsigned int cblkincl_size;
     Jpeg2000Cblk *cblk;
+    size_t cblk_allocated;
     int decoded_layers;
     int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
 } Jpeg2000Prec; // precinct
@@ -207,6 +212,7 @@ typedef struct Jpeg2000Band {
     int i_stepsize; // quantization stepsize
     float f_stepsize; // quantization stepsize
     Jpeg2000Prec *prec;
+    size_t prec_allocated;
 } Jpeg2000Band; // subband
 
 typedef struct Jpeg2000ResLevel {
@@ -215,13 +221,17 @@ typedef struct Jpeg2000ResLevel {
     int num_precincts_x, num_precincts_y; // number of precincts in x/y direction
     uint8_t log2_prec_width, log2_prec_height; // exponent of precinct size
     Jpeg2000Band *band;
+    size_t band_allocated;
 } Jpeg2000ResLevel; // resolution level
 
 typedef struct Jpeg2000Component {
     Jpeg2000ResLevel *reslevel;
+    size_t reslevel_allocated;
     DWTContext dwt;
     float *f_data;
+    unsigned int f_data_size;
     int *i_data;
+    unsigned int i_data_size;
     int coord[2][2];   // border coordinates {{x0, x1}, {y0, y1}} -- can be reduced with lowres option
     int coord_o[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- original values from jpeg2000 headers
     uint8_t roi_shift; // ROI scaling value for the component
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 00aa73e261..49a815a9b0 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -79,6 +79,7 @@ typedef struct Jpeg2000TilePart {
  * one per component, so tile_part elements have a size of 3 */
 typedef struct Jpeg2000Tile {
     Jpeg2000Component   *comp;
+    size_t              comp_allocated;
     uint8_t             properties[4];
     Jpeg2000CodingStyle codsty[4];
     Jpeg2000QuantStyle  qntsty[4];
@@ -141,6 +142,7 @@ typedef struct Jpeg2000DecoderContext {
     int             curtileno;
 
     Jpeg2000Tile    *tile;
+    size_t          tile_allocated;
     Jpeg2000DSPContext dsp;
 
     /*options parameters*/
@@ -380,8 +382,7 @@ static int get_siz(Jpeg2000DecoderContext *s)
         return AVERROR(EINVAL);
     }
 
-    s->tile = av_calloc(s->numXtiles * s->numYtiles, sizeof(*s->tile));
-    if (!s->tile) {
+    if (av_reallocz_array_reuse(&s->tile, &s->tile_allocated, s->numXtiles * s->numYtiles, INT_MAX, sizeof(*s->tile))) {
         s->numXtiles = s->numYtiles = 0;
         return AVERROR(ENOMEM);
     }
@@ -389,8 +390,7 @@ static int get_siz(Jpeg2000DecoderContext *s)
     for (i = 0; i < s->numXtiles * s->numYtiles; i++) {
         Jpeg2000Tile *tile = s->tile + i;
 
-        tile->comp = av_mallocz(s->ncomponents * sizeof(*tile->comp));
-        if (!tile->comp)
+        if (av_reallocz_array_reuse(&tile->comp, &tile->comp_allocated, s->ncomponents, INT_MAX, sizeof(*tile->comp)))
             return AVERROR(ENOMEM);
     }
 
@@ -1160,7 +1160,6 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
         for (cblkno = 0; cblkno < nb_code_blocks; cblkno++) {
             Jpeg2000Cblk *cblk = prec->cblk + cblkno;
             int incl, newpasses, llen;
-            void *tmp;
 
             if (cblk->npasses)
                 incl = get_bits(s, 1);
@@ -1200,14 +1199,10 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
 
             cblk->nb_lengthinc = 0;
             cblk->nb_terminationsinc = 0;
-            av_free(cblk->lengthinc);
-            cblk->lengthinc = av_calloc(newpasses, sizeof(*cblk->lengthinc));
-            if (!cblk->lengthinc)
+            if (av_realloc_array_reuse(&cblk->lengthinc, &cblk->lengthinc_allocated, newpasses, INT_MAX, sizeof(*cblk->lengthinc)) ||
+                av_realloc_array_reuse(&cblk->data_start, &cblk->data_start_allocated, cblk->nb_terminations + newpasses + 1, INT_MAX, sizeof(*cblk->data_start)))
                 return AVERROR(ENOMEM);
-            tmp = av_realloc_array(cblk->data_start, cblk->nb_terminations + newpasses + 1, sizeof(*cblk->data_start));
-            if (!tmp)
-                return AVERROR(ENOMEM);
-            cblk->data_start = tmp;
+
             do {
                 int newpasses1 = 0;
 
@@ -1296,7 +1291,6 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                     cblk->data_start[cblk->nb_terminations] = cblk->length;
                 }
             }
-            av_freep(&cblk->lengthinc);
         }
     }
     // Save state of stream
@@ -2172,24 +2166,9 @@ static int jpeg2000_mct_write_frame(AVCodecContext *avctx, void *td,
 
 static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s)
 {
-    int tileno, compno;
-    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
-        if (s->tile[tileno].comp) {
-            for (compno = 0; compno < s->ncomponents; compno++) {
-                Jpeg2000Component *comp     = s->tile[tileno].comp   + compno;
-                Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno;
-
-                ff_jpeg2000_cleanup(comp, codsty);
-            }
-            av_freep(&s->tile[tileno].comp);
-            av_freep(&s->tile[tileno].packed_headers);
-            s->tile[tileno].packed_headers_size = 0;
-        }
-    }
     av_freep(&s->packed_headers);
     s->packed_headers_size = 0;
     memset(&s->packed_headers_stream, 0, sizeof(s->packed_headers_stream));
-    av_freep(&s->tile);
     memset(s->codsty, 0, sizeof(s->codsty));
     memset(s->qntsty, 0, sizeof(s->qntsty));
     memset(s->properties, 0, sizeof(s->properties));
@@ -2726,6 +2705,19 @@ static av_cold int jpeg2000_decode_close(AVCodecContext *avctx)
 {
     Jpeg2000DecoderContext *s = avctx->priv_data;
 
+    for (size_t tileno = 0; tileno < s->tile_allocated; tileno++) {
+        if (s->tile[tileno].comp) {
+            for (size_t compno = 0; compno < s->tile[tileno].comp_allocated; compno++) {
+                Jpeg2000Component *comp     = s->tile[tileno].comp   + compno;
+                Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno;
+
+                ff_jpeg2000_cleanup(comp, codsty);
+            }
+            av_freep(&s->tile[tileno].comp);
+            av_freep(&s->tile[tileno].packed_headers);
+        }
+    }
+    av_freep(&s->tile);
     av_freep(&s->idwt);
     av_freep(&s->cb);
 
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index 921461b6d7..f3ddefe48f 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -531,6 +531,7 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
 {
     int i, j, lev = decomp_levels,
         b[2][2];
+    size_t prod;
 
     s->ndeclevels = decomp_levels;
     s->type       = type;
@@ -556,11 +557,15 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
         }
 
     if (type == FF_DWT97) {
-        s->f_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->f_linebuf));
+        if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->f_linebuf), &prod))
+            return AVERROR(ENOMEM);
+        av_fast_malloc(&s->f_linebuf, &s->f_linebuf_size, prod);
         if (!s->f_linebuf)
             return AVERROR(ENOMEM);
     } else {
-        s->i_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->i_linebuf));
+        if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->i_linebuf), &prod))
+            return AVERROR(ENOMEM);
+        av_fast_malloc(&s->i_linebuf, &s->i_linebuf_size, prod);
         if (!s->i_linebuf)
             return AVERROR(ENOMEM);
     }
diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h
index d5e94c9916..fb6fc8f121 100644
--- a/libavcodec/jpeg2000dwt.h
+++ b/libavcodec/jpeg2000dwt.h
@@ -48,7 +48,9 @@ typedef struct DWTContext {
     uint8_t ndeclevels;                  ///< number of decomposition levels
     uint8_t type;                        ///< 0 for 9/7; 1 for 5/3
     int32_t *i_linebuf;                  ///< int buffer used by transform
+    unsigned int i_linebuf_size;
     float   *f_linebuf;                  ///< float buffer used by transform
+    unsigned int f_linebuf_size;
     int max_slices;
     int linesize;
 } DWTContext;
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [FFmpeg-devel] [PATCH 06/11] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent
  2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
                   ` (3 preceding siblings ...)
  2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and av_reallocz_array_reuse() to eliminate lots of allocations Tomas Härdin
@ 2022-09-28 10:07 ` Tomas Härdin
  2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 07/11] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4 Tomas Härdin
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:07 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0006-lavc-jpeg2000-Switch-Jpeg2000TgtNode-to-int32_t-pare.patch --]
[-- Type: text/x-patch, Size: 9160 bytes --]

From 807d7d315269126e7eccd0c36d7c29615cb98676 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 7 Jun 2022 16:43:40 +0200
Subject: [PATCH 06/11] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent

---
 libavcodec/j2kenc.c      | 44 ++++++++++++++++++++--------------------
 libavcodec/jpeg2000.c    | 20 +++++++++---------
 libavcodec/jpeg2000.h    |  2 +-
 libavcodec/jpeg2000dec.c | 18 ++++++++--------
 4 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
index cd325e94e0..8f23ddbcf6 100644
--- a/libavcodec/j2kenc.c
+++ b/libavcodec/j2kenc.c
@@ -249,36 +249,36 @@ static void j2k_flush(Jpeg2000EncoderContext *s)
 /* tag tree routines */
 
 /** code the value stored in node */
-static void tag_tree_code(Jpeg2000EncoderContext *s, Jpeg2000TgtNode *node, int threshold)
+static void tag_tree_code(Jpeg2000EncoderContext *s, Jpeg2000TgtNode *nodes, int32_t node, int threshold)
 {
-    Jpeg2000TgtNode *stack[30];
+    int32_t stack[30];
     int sp = -1, curval = 0;
 
-    while(node->parent){
+    while(nodes[node].parent >= 0){
         stack[++sp] = node;
-        node = node->parent;
+        node = nodes[node].parent;
     }
 
     while (1) {
-        if (curval > node->temp_val)
-            node->temp_val = curval;
+        if (curval > nodes[node].temp_val)
+            nodes[node].temp_val = curval;
         else {
-            curval = node->temp_val;
+            curval = nodes[node].temp_val;
         }
 
-        if (node->val >= threshold) {
+        if (nodes[node].val >= threshold) {
             put_bits(s, 0, threshold - curval);
             curval = threshold;
         } else {
-            put_bits(s, 0, node->val - curval);
-            curval = node->val;
-            if (!node->vis) {
+            put_bits(s, 0, nodes[node].val - curval);
+            curval = nodes[node].val;
+            if (!nodes[node].vis) {
                 put_bits(s, 1, 1);
-                node->vis = 1;
+                nodes[node].vis = 1;
             }
         }
 
-        node->temp_val = curval;
+        nodes[node].temp_val = curval;
         if (sp < 0)
             break;
         node = stack[sp--];
@@ -286,13 +286,13 @@ static void tag_tree_code(Jpeg2000EncoderContext *s, Jpeg2000TgtNode *node, int
 }
 
 /** update the value in node */
-static void tag_tree_update(Jpeg2000TgtNode *node)
+static void tag_tree_update(Jpeg2000TgtNode *nodes, int node)
 {
-    while (node->parent){
-        if (node->parent->val <= node->val)
+    while (nodes[node].parent >= 0){
+        if (nodes[nodes[node].parent].val <= nodes[node].val)
             break;
-        node->parent->val = node->val;
-        node = node->parent;
+        nodes[nodes[node].parent].val = nodes[node].val;
+        node = nodes[node].parent;
     }
 }
 
@@ -812,7 +812,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in
                     prec->zerobits[pos].val = expn[bandno] + numgbits - 1 - cblk->nonzerobits;
                     cblk->incl = 0;
                     cblk->lblock = 3;
-                    tag_tree_update(prec->zerobits + pos);
+                    tag_tree_update(prec->zerobits, pos);
                     for (i = 0; i < nlayers; i++) {
                         if (cblk->layers[i].npasses > 0) {
                             prec->cblkincl[pos].val = i;
@@ -821,7 +821,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in
                     }
                     if (i == nlayers)
                         prec->cblkincl[pos].val = i;
-                    tag_tree_update(prec->cblkincl + pos);
+                    tag_tree_update(prec->cblkincl, pos);
                 }
             }
         }
@@ -875,7 +875,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in
 
                 // inclusion information
                 if (!cblk->incl)
-                    tag_tree_code(s, prec->cblkincl + pos, layno + 1);
+                    tag_tree_code(s, prec->cblkincl, pos, layno + 1);
                 else {
                     put_bits(s, cblk->layers[layno].npasses > 0, 1);
                 }
@@ -885,7 +885,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in
 
                 // zerobits information
                 if (!cblk->incl) {
-                    tag_tree_code(s, prec->zerobits + pos, 100);
+                    tag_tree_code(s, prec->zerobits, pos, 100);
                     cblk->incl = 1;
                 }
 
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index afff9809e4..4ddb45bf33 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -55,8 +55,8 @@ static int32_t tag_tree_size(int w, int h)
 static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h)
 {
     int pw = w, ph = h;
-    Jpeg2000TgtNode *t, *t2;
-    int32_t tt_size;
+    Jpeg2000TgtNode *t;
+    int32_t tt_size, ofs = 0;
     size_t prod;
 
     tt_size = tag_tree_size(w, h);
@@ -77,15 +77,15 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size,
 
         w  = (w + 1) >> 1;
         h  = (h + 1) >> 1;
-        t2 = t + pw * ph;
+        ofs += pw * ph;
 
         for (i = 0; i < ph; i++)
             for (j = 0; j < pw; j++)
-                t[i * pw + j].parent = &t2[(i >> 1) * w + (j >> 1)];
+                t[i * pw + j].parent = (i >> 1) * w + (j >> 1) + ofs;
 
-        t = t2;
+        t += pw * ph;
     }
-    t[0].parent = NULL;
+    t[0].parent = -1;
     return 0;
 }
 
@@ -320,6 +320,10 @@ static int init_prec(AVCodecContext *avctx,
                                 band->log2_cblk_height)
         - (prec->coord[1][0] >> band->log2_cblk_height);
 
+    /* \sum_{i=0}^\inf 4^-i = 4/3 */
+    if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT32_MAX / 4 * 3) {
+        return AVERROR(ENOMEM);
+    }
 
     /* Tag trees initialization */
     if ((ret = ff_jpeg2000_tag_tree_init(&prec->cblkincl,
@@ -332,10 +336,6 @@ static int init_prec(AVCodecContext *avctx,
                                          prec->nb_codeblocks_height)) < 0)
         return ret;
 
-    if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) {
-        prec->cblk = NULL;
-        return AVERROR(ENOMEM);
-    }
     nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height;
     if (av_reallocz_array_reuse(&prec->cblk, &prec->cblk_allocated, nb_codeblocks, INT_MAX, sizeof(*prec->cblk)))
         return AVERROR(ENOMEM);
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index 6594d8e5cb..c9a2e55efa 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -128,10 +128,10 @@ typedef struct Jpeg2000T1Context {
 } Jpeg2000T1Context;
 
 typedef struct Jpeg2000TgtNode {
+    int32_t parent;
     uint8_t val;
     uint8_t temp_val;
     uint8_t vis;
-    struct Jpeg2000TgtNode *parent;
 } Jpeg2000TgtNode;
 
 typedef struct Jpeg2000CodingStyle {
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 49a815a9b0..46f7d841b5 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -185,24 +185,24 @@ static void jpeg2000_flush(Jpeg2000DecoderContext *s)
 }
 
 /* decode the value stored in node */
-static int tag_tree_decode(Jpeg2000DecoderContext *s, Jpeg2000TgtNode *node,
+static int tag_tree_decode(Jpeg2000DecoderContext *s, Jpeg2000TgtNode *nodes, int32_t node,
                            int threshold)
 {
     Jpeg2000TgtNode *stack[30];
     int sp = -1, curval = 0;
 
-    if (!node) {
+    if (node < 0) {
         av_log(s->avctx, AV_LOG_ERROR, "missing node\n");
         return AVERROR_INVALIDDATA;
     }
 
-    while (node && !node->vis) {
-        stack[++sp] = node;
-        node        = node->parent;
+    while (node >= 0 && !nodes[node].vis) {
+        stack[++sp] = &nodes[node];
+        node        = nodes[node].parent;
     }
 
-    if (node)
-        curval = node->val;
+    if (node >= 0)
+        curval = nodes[node].val;
     else
         curval = stack[sp]->val;
 
@@ -1164,7 +1164,7 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
             if (cblk->npasses)
                 incl = get_bits(s, 1);
             else
-                incl = tag_tree_decode(s, prec->cblkincl + cblkno, layno + 1) == layno;
+                incl = tag_tree_decode(s, prec->cblkincl, cblkno, layno + 1) == layno;
             if (!incl)
                 continue;
             else if (incl < 0)
@@ -1172,7 +1172,7 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
 
             if (!cblk->npasses) {
                 int v = expn[bandno] + numgbits - 1 -
-                        tag_tree_decode(s, prec->zerobits + cblkno, 100);
+                        tag_tree_decode(s, prec->zerobits, cblkno, 100);
                 if (v < 0 || v > 30) {
                     av_log(s->avctx, AV_LOG_ERROR,
                            "nonzerobits %d invalid or unsupported\n", v);
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [FFmpeg-devel] [PATCH 07/11] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4
  2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
                   ` (4 preceding siblings ...)
  2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 06/11] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent Tomas Härdin
@ 2022-09-28 10:07 ` Tomas Härdin
  2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 08/11] lavc/jpeg2000: Reindent Tomas Härdin
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:07 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0007-lavc-jpeg2000-Speed-up-ff_jpeg2000_tag_tree_init-usi.patch --]
[-- Type: text/x-patch, Size: 2598 bytes --]

From a31a6d82b257f51618389a67af18d49cc78ac240 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Wed, 8 Jun 2022 10:08:15 +0200
Subject: [PATCH 07/11] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init()
 using stereotypes for sizes <= 4x4

---
 libavcodec/jpeg2000.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 4ddb45bf33..203782502c 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -51,6 +51,31 @@ static int32_t tag_tree_size(int w, int h)
     return (int32_t)(res + 1);
 }
 
+#define T(x) (x*sizeof(Jpeg2000TgtNode))
+
+static const size_t tt_sizes[16] = {
+    T(1),T(3),T(6),T(7),T(3),T(5),T(9),T(11),T(6),T(9),T(14),T(17),T(7),T(11),T(17),T(21),
+};
+
+static const Jpeg2000TgtNode tt_stereotypes[16][21] = {
+    {{-1},},
+    {{2},{2},{-1},},
+    {{3},{3},{4},{5},{5},{-1},},
+    {{4},{4},{5},{5},{6},{6},{-1},},
+    {{2},{2},{-1},},
+    {{4},{4},{4},{4},{-1},},
+    {{6},{6},{7},{6},{6},{7},{8},{8},{-1},},
+    {{8},{8},{9},{9},{8},{8},{9},{9},{10},{10},{-1},},
+    {{3},{3},{4},{5},{5},{-1},},
+    {{6},{6},{6},{6},{7},{7},{8},{8},{-1},},
+    {{9},{9},{10},{9},{9},{10},{11},{11},{12},{13},{13},{13},{13},{-1},},
+    {{12},{12},{13},{13},{12},{12},{13},{13},{14},{14},{15},{15},{16},{16},{16},{16},{-1},},
+    {{4},{4},{5},{5},{6},{6},{-1},},
+    {{8},{8},{8},{8},{9},{9},{9},{9},{10},{10},{-1},},
+    {{12},{12},{13},{12},{12},{13},{14},{14},{15},{14},{14},{15},{16},{16},{16},{16},{-1},},
+    {{16},{16},{17},{17},{16},{16},{17},{17},{18},{18},{19},{19},{18},{18},{19},{19},{20},{20},{20},{20},{-1},},
+};
+
 /* allocate the memory for tag tree */
 static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h)
 {
@@ -59,6 +84,16 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size,
     int32_t tt_size, ofs = 0;
     size_t prod;
 
+    if (w <= 4 && h <= 4) {
+        int idx = w-1 + (h-1)*4;
+        size_t sz = tt_sizes[idx];
+        av_fast_malloc(old, size, sz);
+        if (*old) {
+            memcpy(*old, tt_stereotypes[idx], sz);
+            return 0;
+        } else
+            return AVERROR(ENOMEM);
+    } else {
     tt_size = tag_tree_size(w, h);
 
     if (av_size_mult(tt_size, sizeof(*t), &prod))
@@ -87,6 +122,7 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size,
     }
     t[0].parent = -1;
     return 0;
+    }
 }
 
 void ff_tag_tree_zero(Jpeg2000TgtNode *t, int w, int h, int val)
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [FFmpeg-devel] [PATCH 08/11] lavc/jpeg2000: Reindent
  2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
                   ` (5 preceding siblings ...)
  2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 07/11] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4 Tomas Härdin
@ 2022-09-28 10:07 ` Tomas Härdin
  2022-09-28 10:08 ` [FFmpeg-devel] [PATCH 09/11] lavc/jpeg2000: Minimize calls to av_codec_is_encoder() Tomas Härdin
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:07 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0008-lavc-jpeg2000-Reindent.patch --]
[-- Type: text/x-patch, Size: 2032 bytes --]

From 2e915fbbd481c1d202c0ec058842cfc9f9593871 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 14 Jun 2022 11:23:08 +0200
Subject: [PATCH 08/11] lavc/jpeg2000: Reindent

---
 libavcodec/jpeg2000.c | 44 +++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 203782502c..2574c2b97e 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -94,34 +94,34 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size,
         } else
             return AVERROR(ENOMEM);
     } else {
-    tt_size = tag_tree_size(w, h);
+        tt_size = tag_tree_size(w, h);
 
-    if (av_size_mult(tt_size, sizeof(*t), &prod))
-        return AVERROR(ENOMEM);
+        if (av_size_mult(tt_size, sizeof(*t), &prod))
+            return AVERROR(ENOMEM);
 
-    av_fast_malloc(old, size, prod);
-    if (!*old)
-        return AVERROR(ENOMEM);
-    t = *old;
-    memset(*old, 0, prod);
+        av_fast_malloc(old, size, prod);
+        if (!*old)
+            return AVERROR(ENOMEM);
+        t = *old;
+        memset(*old, 0, prod);
 
-    while (w > 1 || h > 1) {
-        int i, j;
-        pw = w;
-        ph = h;
+        while (w > 1 || h > 1) {
+            int i, j;
+            pw = w;
+            ph = h;
 
-        w  = (w + 1) >> 1;
-        h  = (h + 1) >> 1;
-        ofs += pw * ph;
+            w  = (w + 1) >> 1;
+            h  = (h + 1) >> 1;
+            ofs += pw * ph;
 
-        for (i = 0; i < ph; i++)
-            for (j = 0; j < pw; j++)
-                t[i * pw + j].parent = (i >> 1) * w + (j >> 1) + ofs;
+            for (i = 0; i < ph; i++)
+                for (j = 0; j < pw; j++)
+                    t[i * pw + j].parent = (i >> 1) * w + (j >> 1) + ofs;
 
-        t += pw * ph;
-    }
-    t[0].parent = -1;
-    return 0;
+            t += pw * ph;
+        }
+        t[0].parent = -1;
+        return 0;
     }
 }
 
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [FFmpeg-devel] [PATCH 09/11] lavc/jpeg2000: Minimize calls to av_codec_is_encoder()
  2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
                   ` (6 preceding siblings ...)
  2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 08/11] lavc/jpeg2000: Reindent Tomas Härdin
@ 2022-09-28 10:08 ` Tomas Härdin
  2022-09-28 10:09 ` [FFmpeg-devel] [PATCH 10/11] lavc/jpeg2000dec: Use coarser slicing for initial reslevels Tomas Härdin
  2022-09-28 10:10 ` [FFmpeg-devel] [PATCH 11/11] lavc/jpeg2000dec: Component-level threading of write_frame() Tomas Härdin
  9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:08 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: 0009-lavc-jpeg2000-Minimize-calls-to-av_codec_is_encoder.patch --]
[-- Type: text/x-patch, Size: 4211 bytes --]

From ecb1d9ff671b83bddb0d1c7d31d60ade4b5cdead Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 14 Jun 2022 10:57:45 +0200
Subject: [PATCH 09/11] lavc/jpeg2000: Minimize calls to av_codec_is_encoder()

---
 libavcodec/jpeg2000.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 2574c2b97e..bb6efab72e 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -248,7 +248,7 @@ static void init_band_stepsize(AVCodecContext *avctx,
                                Jpeg2000CodingStyle *codsty,
                                Jpeg2000QuantStyle *qntsty,
                                int bandno, int gbandno, int reslevelno,
-                               int cbps)
+                               int cbps, int is_enc)
 {
     /* TODO: Implementation of quantization step not finished,
      * see ISO/IEC 15444-1:2002 E.1 and A.6.4. */
@@ -306,7 +306,7 @@ static void init_band_stepsize(AVCodecContext *avctx,
 
     /* FIXME: In OpenJPEG code stepsize = stepsize * 0.5. Why?
      * If not set output of entropic decoder is not correct. */
-    if (!av_codec_is_encoder(avctx->codec))
+    if (!is_enc)
         band->f_stepsize *= 0.5;
 }
 
@@ -317,7 +317,8 @@ static int init_prec(AVCodecContext *avctx,
                      Jpeg2000CodingStyle *codsty,
                      int precno, int bandno, int reslevelno,
                      int log2_band_prec_width,
-                     int log2_band_prec_height)
+                     int log2_band_prec_height,
+                     int is_enc)
 {
     Jpeg2000Prec *prec = band->prec + precno;
     int nb_codeblocks, cblkno, ret;
@@ -414,7 +415,7 @@ static int init_prec(AVCodecContext *avctx,
         cblk->lblock    = 3;
         cblk->length    = 0;
         cblk->npasses   = 0;
-        if (av_codec_is_encoder(avctx->codec)) {
+        if (is_enc) {
             av_freep(&cblk->layers);
             cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers));
             if (!cblk->layers)
@@ -431,7 +432,7 @@ static int init_band(AVCodecContext *avctx,
                      Jpeg2000CodingStyle *codsty,
                      Jpeg2000QuantStyle *qntsty,
                      int bandno, int gbandno, int reslevelno,
-                     int cbps, int dx, int dy)
+                     int cbps, int dx, int dy, int is_enc)
 {
     Jpeg2000Band *band = reslevel->band + bandno;
     uint8_t log2_band_prec_width, log2_band_prec_height;
@@ -440,7 +441,7 @@ static int init_band(AVCodecContext *avctx,
     int nb_precincts;
     int i, j, ret;
 
-    init_band_stepsize(avctx, band, codsty, qntsty, bandno, gbandno, reslevelno, cbps);
+    init_band_stepsize(avctx, band, codsty, qntsty, bandno, gbandno, reslevelno, cbps, is_enc);
 
     /* computation of tbx_0, tbx_1, tby_0, tby_1
      * see ISO/IEC 15444-1:2002 B.5 eq. B-15 and tbl B.1
@@ -494,7 +495,8 @@ static int init_band(AVCodecContext *avctx,
     for (precno = 0; precno < nb_precincts; precno++) {
         ret = init_prec(avctx, band, reslevel, comp, codsty,
                         precno, bandno, reslevelno,
-                        log2_band_prec_width, log2_band_prec_height);
+                        log2_band_prec_width, log2_band_prec_height,
+                        is_enc);
         if (ret < 0)
             return ret;
     }
@@ -511,6 +513,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
     int reslevelno, bandno, gbandno = 0, ret, i, j;
     uint32_t csize;
     size_t prod;
+    int is_enc = av_codec_is_encoder(avctx->codec);
 
     if (codsty->nreslevels2decode <= 0) {
         av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode);
@@ -608,7 +611,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
             ret = init_band(avctx, reslevel,
                             comp, codsty, qntsty,
                             bandno, gbandno, reslevelno,
-                            cbps, dx, dy);
+                            cbps, dx, dy, is_enc);
             if (ret < 0)
                 return ret;
         }
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [FFmpeg-devel] [PATCH 10/11] lavc/jpeg2000dec: Use coarser slicing for initial reslevels
  2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
                   ` (7 preceding siblings ...)
  2022-09-28 10:08 ` [FFmpeg-devel] [PATCH 09/11] lavc/jpeg2000: Minimize calls to av_codec_is_encoder() Tomas Härdin
@ 2022-09-28 10:09 ` Tomas Härdin
  2022-09-28 10:10 ` [FFmpeg-devel] [PATCH 11/11] lavc/jpeg2000dec: Component-level threading of write_frame() Tomas Härdin
  9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:09 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 156 bytes --]

This patch is more informal, there's probably a better way to bump
performance up above 50 fps. This is for the smoke_sauna sample on
SVT's FTP by the way.

[-- Attachment #2: 0010-lavc-jpeg2000dec-Use-coarser-slicing-for-initial-res.patch --]
[-- Type: text/x-patch, Size: 1338 bytes --]

From 103c38adabae39a607049ef517de43f4d2f9d406 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 14 Jun 2022 11:19:06 +0200
Subject: [PATCH 10/11] lavc/jpeg2000dec: Use coarser slicing for initial
 reslevels

This brings -lowres 2 lossless 4K J2K on an AMD EPYC 7R32 to 52 fps (2080% CPU).
---
 libavcodec/jpeg2000dec.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 46f7d841b5..71176d944d 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -2673,9 +2673,19 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
 
     for (s->reslevel = 0; s->reslevel < maxreslevels; s->reslevel++) {
         for (s->dir = 0; s->dir < 2; s->dir++) {
+            int before = s->slices;
+            int div = s->slices >= 96 ? 7 : 5;
+
+            if (s->reslevel < div) {
+                int halve = 1<<(div - s->reslevel + (s->slices >= 96 ? 0 : 1 - s->dir));
+                s->slices = (s->slices + halve-1)/halve;
+            }
+
             if ((ret = avctx->execute2(avctx, jpeg2000_idwt, NULL, NULL,
                     s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
                 goto end;
+
+            s->slices = before;
         }
     }
 
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [FFmpeg-devel] [PATCH 11/11] lavc/jpeg2000dec: Component-level threading of write_frame()
  2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
                   ` (8 preceding siblings ...)
  2022-09-28 10:09 ` [FFmpeg-devel] [PATCH 10/11] lavc/jpeg2000dec: Use coarser slicing for initial reslevels Tomas Härdin
@ 2022-09-28 10:10 ` Tomas Härdin
  9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:10 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 99 bytes --]

Ideally the clipping done by write_frame() and the MCT stuff would be
done at the final IDWT stage

[-- Attachment #2: 0011-lavc-jpeg2000dec-Component-level-threading-of-write_.patch --]
[-- Type: text/x-patch, Size: 5610 bytes --]

From 34f055bb0732085d6f97d8f27890b47afb8ca868 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 14 Jun 2022 15:45:32 +0200
Subject: [PATCH 11/11] lavc/jpeg2000dec: Component-level threading of
 write_frame()

Split off MCT and don't bother with it unless the picture actually uses MCT.
---
 libavcodec/jpeg2000dec.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 71176d944d..8b984cfc27 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -156,6 +156,7 @@ typedef struct Jpeg2000DecoderContext {
     // used for idwt slicing
     int reslevel, dir, slices;
     int have_dwt97_int; // 1 if any coding style is FF_DWT97_INT
+    int have_mct;
 } Jpeg2000DecoderContext;
 
 /* get_bits functions for JPEG2000 packet bitstream
@@ -604,6 +605,9 @@ static int get_cod(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c,
         return AVERROR_INVALIDDATA;
     }
 
+    if (tmp.mct)
+        s->have_mct = 1;
+
     if ((ret = get_cox(s, &tmp)) < 0)
         return ret;
     tmp.init = 1;
@@ -2074,16 +2078,14 @@ static int jpeg2000_dwt97_int_postshift(AVCodecContext *avctx, void *td,
 
 #define WRITE_FRAME(D, PIXEL)                                                                     \
     static inline void write_frame_ ## D(const Jpeg2000DecoderContext * s, Jpeg2000Tile * tile,   \
-                                         AVFrame * picture, int precision)                        \
+                                         AVFrame * picture, int precision, int compno)            \
     {                                                                                             \
         const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->avctx->pix_fmt);               \
         int planar    = !!(pixdesc->flags & AV_PIX_FMT_FLAG_PLANAR);                              \
         int pixelsize = planar ? 1 : pixdesc->nb_components;                                      \
                                                                                                   \
-        int compno;                                                                               \
         int x, y;                                                                                 \
                                                                                                   \
-        for (compno = 0; compno < s->ncomponents; compno++) {                                     \
             Jpeg2000Component *comp     = tile->comp + compno;                                    \
             Jpeg2000CodingStyle *codsty = tile->codsty + compno;                                  \
             PIXEL *line;                                                                          \
@@ -2130,8 +2132,6 @@ static int jpeg2000_dwt97_int_postshift(AVCodecContext *avctx, void *td,
                 }                                                                                 \
                 line += picture->linesize[plane] / sizeof(PIXEL);                                 \
             }                                                                                     \
-        }                                                                                         \
-                                                                                                  \
     }
 
 WRITE_FRAME(8, uint8_t)
@@ -2139,26 +2139,36 @@ WRITE_FRAME(16, uint16_t)
 
 #undef WRITE_FRAME
 
-static int jpeg2000_mct_write_frame(AVCodecContext *avctx, void *td,
-                                    int jobnr, int threadnr)
+static int jpeg2000_mct(AVCodecContext *avctx, void *td,
+                        int jobnr, int threadnr)
 {
     const Jpeg2000DecoderContext *s = avctx->priv_data;
-    AVFrame *picture = td;
     Jpeg2000Tile *tile = s->tile + jobnr;
 
     /* inverse MCT transformation */
     if (tile->codsty[0].mct)
         mct_decode(s, tile);
 
+    return 0;
+}
+
+static int jpeg2000_write_frame(AVCodecContext *avctx, void *td,
+                                int jobnr, int threadnr)
+{
+    Jpeg2000DecoderContext *s = avctx->priv_data;
+    AVFrame *picture = td;
+    Jpeg2000Tile *tile = s->tile + jobnr / s->ncomponents;
+    int compno = jobnr % s->ncomponents;
+
     if (s->precision <= 8) {
-        write_frame_8(s, tile, picture, 8);
+        write_frame_8(s, tile, picture, 8, compno);
     } else {
         int precision = picture->format == AV_PIX_FMT_XYZ12 ||
                         picture->format == AV_PIX_FMT_RGB48 ||
                         picture->format == AV_PIX_FMT_RGBA64 ||
                         picture->format == AV_PIX_FMT_GRAY16 ? 16 : s->precision;
 
-        write_frame_16(s, tile, picture, precision);
+        write_frame_16(s, tile, picture, precision, compno);
     }
 
     return 0;
@@ -2694,7 +2704,12 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
             s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
         goto end;
 
-    if ((ret = avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles)) < 0)
+    if (s->have_mct &&
+        (ret = avctx->execute2(avctx, jpeg2000_mct, NULL, NULL, s->numXtiles * s->numYtiles)) < 0)
+        goto end;
+
+    if ((ret = avctx->execute2(avctx, jpeg2000_write_frame, picture, NULL,
+            s->numXtiles * s->numYtiles * s->ncomponents)) < 0)
         goto end;
 
     jpeg2000_dec_cleanup(s);
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile()
  2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile() Tomas Härdin
@ 2022-09-28 14:14   ` Tomas Härdin
  0 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 14:14 UTC (permalink / raw)
  To: FFmpeg development discussions and patches

[-- Attachment #1: Type: text/plain, Size: 170 bytes --]

ons 2022-09-28 klockan 12:06 +0200 skrev Tomas Härdin:
> This is the one that needs the new execute2()

A data race snuck into this one, updated patch attached.

/Tomas

[-- Attachment #2: 0004-lavc-jpeg2000dec-Thread-init_tile.patch --]
[-- Type: text/x-patch, Size: 3559 bytes --]

From 6fc3920731950a1820f88e3ae0cf1258ae17b75d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Mon, 13 Jun 2022 15:09:17 +0200
Subject: [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile()

---
 libavcodec/jpeg2000dec.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 273346538f..a680eaa1bd 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -1019,26 +1019,29 @@ static int get_ppt(Jpeg2000DecoderContext *s, int n)
     return 0;
 }
 
-static int init_tile(Jpeg2000DecoderContext *s, int tileno)
+static int init_tile(AVCodecContext *avctx, void *td,
+                     int jobnr, int threadnr)
 {
-    int compno;
-    int tilex = tileno % s->numXtiles;
-    int tiley = tileno / s->numXtiles;
-    Jpeg2000Tile *tile = s->tile + tileno;
+    const Jpeg2000DecoderContext *s = avctx->priv_data;
+    int tileno                      = jobnr / s->ncomponents;
+    int tilex                       = tileno % s->numXtiles;
+    int tiley                       = tileno / s->numXtiles;
+    int compno                      = jobnr % s->ncomponents;
+    Jpeg2000Tile *tile              = s->tile + tileno;
+    Jpeg2000Component *comp         = tile->comp + compno;
+    Jpeg2000CodingStyle *codsty     = tile->codsty + compno;
+    Jpeg2000QuantStyle  *qntsty     = tile->qntsty + compno;
+    int ret; // global bandno
 
     if (!tile->comp)
         return AVERROR(ENOMEM);
 
+    if (compno == 0) {
     tile->coord[0][0] = av_clip(tilex       * (int64_t)s->tile_width  + s->tile_offset_x, s->image_offset_x, s->width);
     tile->coord[0][1] = av_clip((tilex + 1) * (int64_t)s->tile_width  + s->tile_offset_x, s->image_offset_x, s->width);
     tile->coord[1][0] = av_clip(tiley       * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height);
     tile->coord[1][1] = av_clip((tiley + 1) * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height);
-
-    for (compno = 0; compno < s->ncomponents; compno++) {
-        Jpeg2000Component *comp = tile->comp + compno;
-        Jpeg2000CodingStyle *codsty = tile->codsty + compno;
-        Jpeg2000QuantStyle  *qntsty = tile->qntsty + compno;
-        int ret; // global bandno
+    }
 
         comp->coord_o[0][0] = tile->coord[0][0];
         comp->coord_o[0][1] = tile->coord[0][1];
@@ -1063,7 +1066,7 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
                                              s->cbps[compno], s->cdx[compno],
                                              s->cdy[compno], s->avctx, s->slices))
             return ret;
-    }
+
     return 0;
 }
 
@@ -2371,9 +2374,6 @@ static int jpeg2000_read_bitstream_packets(Jpeg2000DecoderContext *s)
     for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
         Jpeg2000Tile *tile = s->tile + tileno;
 
-        if ((ret = init_tile(s, tileno)) < 0)
-            return ret;
-
         if ((ret = jpeg2000_decode_packets(s, tile)) < 0)
             return ret;
     }
@@ -2668,6 +2668,9 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
     picture->key_frame = 1;
     s->slices = avctx->active_thread_type == FF_THREAD_SLICE ? avctx->thread_count : 1;
 
+    if ((ret = avctx->execute2(avctx, init_tile, NULL, NULL, s->numXtiles * s->numYtiles * s->ncomponents)) < 0)
+        goto end;
+
     if (ret = jpeg2000_read_bitstream_packets(s))
         goto end;
 
-- 
2.30.2


[-- Attachment #3: Type: text/plain, Size: 251 bytes --]

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2022-09-28 14:14 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 02/11] lavc/jpeg2000dec: Reindent Tomas Härdin
2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms Tomas Härdin
2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile() Tomas Härdin
2022-09-28 14:14   ` Tomas Härdin
2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and av_reallocz_array_reuse() to eliminate lots of allocations Tomas Härdin
2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 06/11] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent Tomas Härdin
2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 07/11] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4 Tomas Härdin
2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 08/11] lavc/jpeg2000: Reindent Tomas Härdin
2022-09-28 10:08 ` [FFmpeg-devel] [PATCH 09/11] lavc/jpeg2000: Minimize calls to av_codec_is_encoder() Tomas Härdin
2022-09-28 10:09 ` [FFmpeg-devel] [PATCH 10/11] lavc/jpeg2000dec: Use coarser slicing for initial reslevels Tomas Härdin
2022-09-28 10:10 ` [FFmpeg-devel] [PATCH 11/11] lavc/jpeg2000dec: Component-level threading of write_frame() Tomas Härdin

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git