* [FFmpeg-devel] [PATCH 02/11] lavc/jpeg2000dec: Reindent
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
@ 2022-09-28 10:05 ` Tomas Härdin
2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms Tomas Härdin
` (8 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:05 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #2: 0002-lavc-jpeg2000dec-Reindent.patch --]
[-- Type: text/x-patch, Size: 2478 bytes --]
From 99e9b6cf421538dee6e0280daf3d1ffc71c1acda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Fri, 10 Jun 2022 14:12:11 +0200
Subject: [PATCH 02/11] lavc/jpeg2000dec: Reindent
---
libavcodec/jpeg2000dec.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index e823ae58ec..92bd76d90f 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -1973,26 +1973,26 @@ static int jpeg2000_decode_cb(AVCodecContext *avctx, void *td,
t1.stride = (1<<codsty->log2_cblk_width) + 2;
cb->coded = 0;
- ret = decode_cblk(s, codsty, &t1, cblk,
- cblk->coord[0][1] - cblk->coord[0][0],
- cblk->coord[1][1] - cblk->coord[1][0],
- bandpos, comp->roi_shift);
- if (ret)
- cb->coded = 1;
- else
- return 0;
+ ret = decode_cblk(s, codsty, &t1, cblk,
+ cblk->coord[0][1] - cblk->coord[0][0],
+ cblk->coord[1][1] - cblk->coord[1][0],
+ bandpos, comp->roi_shift);
+ if (ret)
+ cb->coded = 1;
+ else
+ return 0;
- x = cblk->coord[0][0] - band->coord[0][0];
- y = cblk->coord[1][0] - band->coord[1][0];
+ x = cblk->coord[0][0] - band->coord[0][0];
+ y = cblk->coord[1][0] - band->coord[1][0];
- if (comp->roi_shift)
- roi_scale_cblk(cblk, comp, &t1);
- if (codsty->transform == FF_DWT97)
- dequantization_float(x, y, cblk, comp, &t1, band);
- else if (codsty->transform == FF_DWT97_INT)
- dequantization_int_97(x, y, cblk, comp, &t1, band);
- else
- dequantization_int(x, y, cblk, comp, &t1, band);
+ if (comp->roi_shift)
+ roi_scale_cblk(cblk, comp, &t1);
+ if (codsty->transform == FF_DWT97)
+ dequantization_float(x, y, cblk, comp, &t1, band);
+ else if (codsty->transform == FF_DWT97_INT)
+ dequantization_int_97(x, y, cblk, comp, &t1, band);
+ else
+ dequantization_int(x, y, cblk, comp, &t1, band);
return 0;
}
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 02/11] lavc/jpeg2000dec: Reindent Tomas Härdin
@ 2022-09-28 10:05 ` Tomas Härdin
2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile() Tomas Härdin
` (7 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:05 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #2: 0003-lavc-jpeg2000dwt-Implement-sliced-transforms.patch --]
[-- Type: text/x-patch, Size: 25043 bytes --]
From 159d744f09f39e3350ac39ac5d05feaca22103af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Fri, 10 Jun 2022 17:18:14 +0200
Subject: [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms
lavc/tests/jpeg2000dwt tests this.
---
libavcodec/j2kenc.c | 3 +-
libavcodec/jpeg2000.c | 5 +-
libavcodec/jpeg2000.h | 2 +-
libavcodec/jpeg2000dec.c | 109 +++++++++++++++++++++++----
libavcodec/jpeg2000dwt.c | 132 +++++++++++++++++++--------------
libavcodec/jpeg2000dwt.h | 6 +-
libavcodec/tests/jpeg2000dwt.c | 15 ++--
7 files changed, 189 insertions(+), 83 deletions(-)
diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
index e883d5deb7..cd325e94e0 100644
--- a/libavcodec/j2kenc.c
+++ b/libavcodec/j2kenc.c
@@ -494,7 +494,8 @@ static int init_tiles(Jpeg2000EncoderContext *s)
s->cbps[compno],
compno?1<<s->chroma_shift[0]:1,
compno?1<<s->chroma_shift[1]:1,
- s->avctx
+ s->avctx,
+ 1
)) < 0)
return ret;
}
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 0aa984bc53..945b787565 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -467,7 +467,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
Jpeg2000CodingStyle *codsty,
Jpeg2000QuantStyle *qntsty,
int cbps, int dx, int dy,
- AVCodecContext *avctx)
+ AVCodecContext *avctx, int max_slices)
{
int reslevelno, bandno, gbandno = 0, ret, i, j;
uint32_t csize;
@@ -479,7 +479,8 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
if (ret = ff_jpeg2000_dwt_init(&comp->dwt, comp->coord,
codsty->nreslevels2decode - 1,
- codsty->transform))
+ codsty->transform,
+ max_slices))
return ret;
if (av_image_check_size(comp->coord[0][1] - comp->coord[0][0],
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index e5ecb4cbf9..389813a9b9 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -280,7 +280,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
Jpeg2000CodingStyle *codsty,
Jpeg2000QuantStyle *qntsty,
int cbps, int dx, int dy,
- AVCodecContext *ctx);
+ AVCodecContext *ctx, int max_slices);
void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty);
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 92bd76d90f..273346538f 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -150,6 +150,10 @@ typedef struct Jpeg2000DecoderContext {
size_t idwt_allocated;
Jpeg2000CodeblockThread *cb;
size_t cb_allocated;
+
+ // used for idwt slicing
+ int reslevel, dir, slices;
+ int have_dwt97_int; // 1 if any coding style is FF_DWT97_INT
} Jpeg2000DecoderContext;
/* get_bits functions for JPEG2000 packet bitstream
@@ -545,9 +549,10 @@ static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
}
c->transform = bytestream2_get_byteu(&s->g); // DWT transformation type
/* set integer 9/7 DWT in case of BITEXACT flag */
- if ((s->avctx->flags & AV_CODEC_FLAG_BITEXACT) && (c->transform == FF_DWT97))
+ if ((s->avctx->flags & AV_CODEC_FLAG_BITEXACT) && (c->transform == FF_DWT97)) {
c->transform = FF_DWT97_INT;
- else if (c->transform == FF_DWT53) {
+ s->have_dwt97_int = 1;
+ } else if (c->transform == FF_DWT53) {
s->avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
}
@@ -1056,7 +1061,7 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
return AVERROR_INVALIDDATA;
if (ret = ff_jpeg2000_init_component(comp, codsty, qntsty,
s->cbps[compno], s->cdx[compno],
- s->cdy[compno], s->avctx))
+ s->cdy[compno], s->avctx, s->slices))
return ret;
}
return 0;
@@ -1997,19 +2002,74 @@ static int jpeg2000_decode_cb(AVCodecContext *avctx, void *td,
return 0;
}
+static int jpeg2000_dwt97_int_preshift(AVCodecContext *avctx, void *td,
+ int jobnr, int threadnr)
+{
+ const Jpeg2000DecoderContext *s = avctx->priv_data;
+ Jpeg2000IdwtThread *idwt = s->idwt + jobnr / s->slices;
+ Jpeg2000Tile *tile = s->tile + jobnr / s->slices / s->ncomponents;
+ int compno = (jobnr / s->slices) % s->ncomponents;
+ int slice = jobnr % s->slices;
+ Jpeg2000Component *comp = tile->comp + compno;
+ Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+ int a = comp->dwt.linelen[comp->dwt.ndeclevels - 1][0] *
+ comp->dwt.linelen[comp->dwt.ndeclevels - 1][1];
+ int as = (a + s->slices - 1)/s->slices;
+
+ for (int i = idwt->cb_start; i < idwt->cb_end; i++) {
+ if (s->cb[i].coded) {
+ if (codsty->transform == FF_DWT97_INT) {
+ for (int i = as*slice; i - as < as*slice; i++)
+ comp->i_data[i] *= 1LL << I_PRESHIFT;
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
static int jpeg2000_idwt(AVCodecContext *avctx, void *td,
int jobnr, int threadnr)
{
const Jpeg2000DecoderContext *s = avctx->priv_data;
- Jpeg2000IdwtThread *idwt = s->idwt + jobnr;
- Jpeg2000Tile *tile = s->tile + jobnr / s->ncomponents;
- int compno = jobnr % s->ncomponents;
+ Jpeg2000IdwtThread *idwt = s->idwt + jobnr / s->slices;
+ Jpeg2000Tile *tile = s->tile + jobnr / s->slices / s->ncomponents;
+ int compno = (jobnr / s->slices) % s->ncomponents;
+ int slice = jobnr % s->slices;
Jpeg2000Component *comp = tile->comp + compno;
Jpeg2000CodingStyle *codsty = tile->codsty + compno;
for (int i = idwt->cb_start; i < idwt->cb_end; i++) {
if (s->cb[i].coded) {
- ff_dwt_decode(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data);
+ ff_dwt_decode_thread(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data, s->reslevel, s->dir, slice, s->slices);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int jpeg2000_dwt97_int_postshift(AVCodecContext *avctx, void *td,
+ int jobnr, int threadnr)
+{
+ const Jpeg2000DecoderContext *s = avctx->priv_data;
+ Jpeg2000IdwtThread *idwt = s->idwt + jobnr / s->slices;
+ Jpeg2000Tile *tile = s->tile + jobnr / s->slices / s->ncomponents;
+ int compno = (jobnr / s->slices) % s->ncomponents;
+ int slice = jobnr % s->slices;
+ Jpeg2000Component *comp = tile->comp + compno;
+ Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+ int a = comp->dwt.linelen[comp->dwt.ndeclevels - 1][0] *
+ comp->dwt.linelen[comp->dwt.ndeclevels - 1][1];
+ int as = (a + s->slices - 1)/s->slices;
+
+ for (int i = idwt->cb_start; i < idwt->cb_end; i++) {
+ if (s->cb[i].coded) {
+ if (codsty->transform == FF_DWT97_INT) {
+ for (int i = as*slice; i - as < as*slice; i++)
+ comp->i_data[i] = (comp->i_data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+ }
break;
}
}
@@ -2480,7 +2540,7 @@ static av_cold int jpeg2000_decode_init(AVCodecContext *avctx)
return 0;
}
-static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
+static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out, int *maxreslevels_out)
{
if (s->numXtiles * s->numYtiles > INT_MAX/s->ncomponents)
return AVERROR(ENOMEM);
@@ -2491,7 +2551,7 @@ static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
return AVERROR(ENOMEM);
for (int pass = 0; pass < 2; pass++) {
- int cbs = 0;
+ int cbs = 0, maxreslevels = 0;
for (int tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
for (int compno = 0; compno < s->ncomponents; compno++) {
Jpeg2000Tile *tile = s->tile + tileno;
@@ -2500,6 +2560,7 @@ static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
Jpeg2000IdwtThread *idwt = s->idwt + compno + tileno * s->ncomponents;
idwt->cb_start = cbs;
+ maxreslevels = FFMAX(maxreslevels, codsty->nreslevels2decode);
for (int reslevelno = 0; reslevelno < codsty->nreslevels2decode; reslevelno++) {
Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
@@ -2544,6 +2605,7 @@ static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
}
*cbs_out = cbs;
+ *maxreslevels_out = maxreslevels;
}
return 0;
}
@@ -2552,7 +2614,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
int *got_frame, AVPacket *avpkt)
{
Jpeg2000DecoderContext *s = avctx->priv_data;
- int ret, cbs;
+ int ret, cbs, maxreslevels;
s->avctx = avctx;
bytestream2_init(&s->g, avpkt->data, avpkt->size);
@@ -2604,6 +2666,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
goto end;
picture->pict_type = AV_PICTURE_TYPE_I;
picture->key_frame = 1;
+ s->slices = avctx->active_thread_type == FF_THREAD_SLICE ? avctx->thread_count : 1;
if (ret = jpeg2000_read_bitstream_packets(s))
goto end;
@@ -2619,12 +2682,30 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
}
}
- if ((ret = jpeg2000_setup_cbs(s, &cbs)) < 0 ||
- (ret = avctx->execute2(avctx, jpeg2000_decode_cb, NULL, NULL, cbs)) < 0 ||
- (ret = avctx->execute2(avctx, jpeg2000_idwt, NULL, NULL, s->numXtiles * s->numYtiles * s->ncomponents)) < 0 ||
- (ret = avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles)) < 0)
+ if ((ret = jpeg2000_setup_cbs(s, &cbs, &maxreslevels)) < 0 ||
+ (ret = avctx->execute2(avctx, jpeg2000_decode_cb, NULL, NULL, cbs)) < 0)
+ goto end;
+
+ if (s->have_dwt97_int &&
+ (ret = avctx->execute2(avctx, jpeg2000_dwt97_int_preshift, NULL, NULL,
+ s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
goto end;
+ for (s->reslevel = 0; s->reslevel < maxreslevels; s->reslevel++) {
+ for (s->dir = 0; s->dir < 2; s->dir++) {
+ if ((ret = avctx->execute2(avctx, jpeg2000_idwt, NULL, NULL,
+ s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
+ goto end;
+ }
+ }
+
+ if (s->have_dwt97_int &&
+ (ret = avctx->execute2(avctx, jpeg2000_dwt97_int_postshift, NULL, NULL,
+ s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
+ goto end;
+
+ if ((ret = avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles)) < 0)
+ goto end;
jpeg2000_dec_cleanup(s);
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index f2da7307c4..921461b6d7 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -45,7 +45,6 @@
#define I_LFTG_DELTA 29066ll
#define I_LFTG_K 80621ll
#define I_LFTG_X 53274ll
-#define I_PRESHIFT 8
static inline void extend53(int *p, int i0, int i1)
{
@@ -322,24 +321,24 @@ static void sr_1d53(unsigned *p, int i0, int i1)
p[2 * i + 1] += (int)(p[2 * i] + p[2 * i + 2]) >> 1;
}
-static void dwt_decode53(DWTContext *s, int *t)
+static void dwt_decode53(DWTContext *s, int *t, int lev, int dir, int slice, int slices)
{
- int lev;
int w = s->linelen[s->ndeclevels - 1][0];
- int32_t *line = s->i_linebuf;
- line += 3;
+ int32_t *line = s->i_linebuf + slice * s->linesize + 3;
- for (lev = 0; lev < s->ndeclevels; lev++) {
int lh = s->linelen[lev][0],
lv = s->linelen[lev][1],
mh = s->mod[lev][0],
mv = s->mod[lev][1],
+ sh = (lh + slices - 1)/slices,
+ sv = (lv + slices - 1)/slices,
lp;
int *l;
+ if (dir == 0) {
// HOR_SD
l = line + mh;
- for (lp = 0; lp < lv; lp++) {
+ for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
int i, j = 0;
// copy with interleaving
for (i = mh; i < lh; i += 2, j++)
@@ -352,10 +351,10 @@ static void dwt_decode53(DWTContext *s, int *t)
for (i = 0; i < lh; i++)
t[w * lp + i] = l[i];
}
-
+ } else {
// VER_SD
l = line + mv;
- for (lp = 0; lp < lh; lp++) {
+ for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
int i, j = 0;
// copy with interleaving
for (i = mv; i < lv; i += 2, j++)
@@ -398,25 +397,26 @@ static void sr_1d97_float(float *p, int i0, int i1)
p[2 * i + 1] += F_LFTG_ALPHA * (p[2 * i] + p[2 * i + 2]);
}
-static void dwt_decode97_float(DWTContext *s, float *t)
+static void dwt_decode97_float(DWTContext *s, float *t, int lev, int dir, int slice, int slices)
{
- int lev;
int w = s->linelen[s->ndeclevels - 1][0];
- float *line = s->f_linebuf;
- float *data = t;
/* position at index O of line range [0-5,w+5] cf. extend function */
- line += 5;
+ float *line = s->f_linebuf + slice * s->linesize + 5;
+ float *data = t;
- for (lev = 0; lev < s->ndeclevels; lev++) {
int lh = s->linelen[lev][0],
lv = s->linelen[lev][1],
mh = s->mod[lev][0],
mv = s->mod[lev][1],
+ sh = (lh + slices - 1)/slices,
+ sv = (lv + slices - 1)/slices,
lp;
float *l;
+
+ if (dir == 0) {
// HOR_SD
l = line + mh;
- for (lp = 0; lp < lv; lp++) {
+ for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
int i, j = 0;
// copy with interleaving
for (i = mh; i < lh; i += 2, j++)
@@ -429,10 +429,10 @@ static void dwt_decode97_float(DWTContext *s, float *t)
for (i = 0; i < lh; i++)
data[w * lp + i] = l[i];
}
-
+ } else {
// VER_SD
l = line + mv;
- for (lp = 0; lp < lh; lp++) {
+ for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
int i, j = 0;
// copy with interleaving
for (i = mv; i < lv; i += 2, j++)
@@ -475,30 +475,26 @@ static void sr_1d97_int(int32_t *p, int i0, int i1)
p[2 * i + 1] += (I_LFTG_ALPHA * (p[2 * i] + (int64_t)p[2 * i + 2]) + (1 << 15)) >> 16;
}
-static void dwt_decode97_int(DWTContext *s, int32_t *t)
+static void dwt_decode97_int(DWTContext *s, int32_t *t, int lev, int dir, int slice, int slices)
{
- int lev;
int w = s->linelen[s->ndeclevels - 1][0];
- int h = s->linelen[s->ndeclevels - 1][1];
- int i;
- int32_t *line = s->i_linebuf;
- int32_t *data = t;
/* position at index O of line range [0-5,w+5] cf. extend function */
- line += 5;
-
- for (i = 0; i < w * h; i++)
- data[i] *= 1LL << I_PRESHIFT;
+ int32_t *line = s->i_linebuf + slice * s->linesize + 5;
+ int32_t *data = t;
- for (lev = 0; lev < s->ndeclevels; lev++) {
int lh = s->linelen[lev][0],
lv = s->linelen[lev][1],
mh = s->mod[lev][0],
mv = s->mod[lev][1],
+ sh = (lh + slices - 1)/slices,
+ sv = (lv + slices - 1)/slices,
lp;
int32_t *l;
+
+ if (dir == 0) {
// HOR_SD
l = line + mh;
- for (lp = 0; lp < lv; lp++) {
+ for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
int i, j = 0;
// rescale with interleaving
for (i = mh; i < lh; i += 2, j++)
@@ -511,10 +507,10 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
for (i = 0; i < lh; i++)
data[w * lp + i] = l[i];
}
-
+ } else {
// VER_SD
l = line + mv;
- for (lp = 0; lp < lh; lp++) {
+ for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
int i, j = 0;
// rescale with interleaving
for (i = mv; i < lv; i += 2, j++)
@@ -528,26 +524,29 @@ static void dwt_decode97_int(DWTContext *s, int32_t *t)
data[w * i + lp] = l[i];
}
}
-
- for (i = 0; i < w * h; i++)
- data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
}
int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
- int decomp_levels, int type)
+ int decomp_levels, int type, int max_slices)
{
- int i, j, lev = decomp_levels, maxlen,
+ int i, j, lev = decomp_levels,
b[2][2];
s->ndeclevels = decomp_levels;
s->type = type;
+ s->max_slices = max_slices;
+
+ if (s->max_slices > INT_MAX/FFMAX(sizeof(*s->f_linebuf),sizeof(*s->i_linebuf)))
+ return AVERROR(ENOMEM);
for (i = 0; i < 2; i++)
for (j = 0; j < 2; j++)
b[i][j] = border[i][j];
- maxlen = FFMAX(b[0][1] - b[0][0],
- b[1][1] - b[1][0]);
+ s->linesize = FFMAX(b[0][1] - b[0][0],
+ b[1][1] - b[1][0]) +
+ (type == FF_DWT53 ? 6 : 12);
+
while (--lev >= 0)
for (i = 0; i < 2; i++) {
s->linelen[lev][i] = b[i][1] - b[i][0];
@@ -555,24 +554,15 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
for (j = 0; j < 2; j++)
b[i][j] = (b[i][j] + 1) >> 1;
}
- switch (type) {
- case FF_DWT97:
- s->f_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->f_linebuf));
+
+ if (type == FF_DWT97) {
+ s->f_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->f_linebuf));
if (!s->f_linebuf)
return AVERROR(ENOMEM);
- break;
- case FF_DWT97_INT:
- s->i_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->i_linebuf));
- if (!s->i_linebuf)
- return AVERROR(ENOMEM);
- break;
- case FF_DWT53:
- s->i_linebuf = av_malloc_array((maxlen + 6), sizeof(*s->i_linebuf));
+ } else {
+ s->i_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->i_linebuf));
if (!s->i_linebuf)
return AVERROR(ENOMEM);
- break;
- default:
- return -1;
}
return 0;
}
@@ -597,18 +587,46 @@ int ff_dwt_encode(DWTContext *s, void *t)
int ff_dwt_decode(DWTContext *s, void *t)
{
- if (s->ndeclevels == 0)
+ int w = s->linelen[s->ndeclevels - 1][0];
+ int h = s->linelen[s->ndeclevels - 1][1];
+ int32_t *data = t;
+
+ if (s->type == FF_DWT97_INT)
+ for (int i = 0; i < w * h; i++)
+ data[i] *= 1LL << I_PRESHIFT;
+
+ for (int lev = 0; lev < s->ndeclevels; lev++)
+ for (int dir = 0; dir < 2; dir++)
+ for (int slice = 0; slice < s->max_slices; slice++) {
+ int ret = ff_dwt_decode_thread(s, t, lev, dir, slice, s->max_slices);
+ if (ret)
+ return ret;
+ }
+
+ if (s->type == FF_DWT97_INT)
+ for (int i = 0; i < w * h; i++)
+ data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+
+ return 0;
+}
+
+int ff_dwt_decode_thread(DWTContext *s, void *t, int lev, int dir, int slice, int slices)
+{
+ slices = FFMIN(s->max_slices, slices);
+
+ // lev can be >= s->ndeclevels in files with mixed reslevels in tiles/components
+ if (s->ndeclevels == 0 || lev >= s->ndeclevels || slice >= slices)
return 0;
switch (s->type) {
case FF_DWT97:
- dwt_decode97_float(s, t);
+ dwt_decode97_float(s, t, lev, dir, slice, slices);
break;
case FF_DWT97_INT:
- dwt_decode97_int(s, t);
+ dwt_decode97_int(s, t, lev, dir, slice, slices);
break;
case FF_DWT53:
- dwt_decode53(s, t);
+ dwt_decode53(s, t, lev, dir, slice, slices);
break;
default:
return -1;
diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h
index 718d183ac1..d5e94c9916 100644
--- a/libavcodec/jpeg2000dwt.h
+++ b/libavcodec/jpeg2000dwt.h
@@ -32,6 +32,7 @@
#define FF_DWT_MAX_DECLVLS 32 ///< max number of decomposition levels
#define F_LFTG_K 1.230174104914001f
#define F_LFTG_X 0.812893066115961f
+#define I_PRESHIFT 8
enum DWTType {
FF_DWT97,
@@ -48,6 +49,8 @@ typedef struct DWTContext {
uint8_t type; ///< 0 for 9/7; 1 for 5/3
int32_t *i_linebuf; ///< int buffer used by transform
float *f_linebuf; ///< float buffer used by transform
+ int max_slices;
+ int linesize;
} DWTContext;
/**
@@ -58,10 +61,11 @@ typedef struct DWTContext {
* @param type 0 for DWT 9/7; 1 for DWT 5/3
*/
int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
- int decomp_levels, int type);
+ int decomp_levels, int type, int max_slices);
int ff_dwt_encode(DWTContext *s, void *t);
int ff_dwt_decode(DWTContext *s, void *t);
+int ff_dwt_decode_thread(DWTContext *s, void *t, int lev, int dir, int slice, int slices);
void ff_dwt_destroy(DWTContext *s);
diff --git a/libavcodec/tests/jpeg2000dwt.c b/libavcodec/tests/jpeg2000dwt.c
index 0e5a6ed947..d4d9e6d224 100644
--- a/libavcodec/tests/jpeg2000dwt.c
+++ b/libavcodec/tests/jpeg2000dwt.c
@@ -31,12 +31,12 @@
#define MAX_W 256
-static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, int type, int max_diff) {
+static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, int type, int max_diff, int slices) {
int ret, j;
DWTContext s1={{{0}}}, *s= &s1;
int64_t err2 = 0;
- ret = ff_jpeg2000_dwt_init(s, border, decomp_levels, type);
+ ret = ff_jpeg2000_dwt_init(s, border, decomp_levels, type, slices);
if (ret < 0) {
fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
return 1;
@@ -70,12 +70,12 @@ static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, i
return 0;
}
-static int test_dwtf(float *array, float *ref, int border[2][2], int decomp_levels, float max_diff) {
+static int test_dwtf(float *array, float *ref, int border[2][2], int decomp_levels, float max_diff, int slices) {
int ret, j;
DWTContext s1={{{0}}}, *s= &s1;
double err2 = 0;
- ret = ff_jpeg2000_dwt_init(s, border, decomp_levels, FF_DWT97);
+ ret = ff_jpeg2000_dwt_init(s, border, decomp_levels, FF_DWT97, slices);
if (ret < 0) {
fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
return 1;
@@ -125,19 +125,20 @@ int main(void) {
arrayf[i] = reff[i] = array[i] = ref[i] = av_lfg_get(&prng) % 2048;
for (i = 0; i < 100; i++) {
+ int slices = 1 + (i % 10);
for (j=0; j<4; j++)
border[j>>1][j&1] = av_lfg_get(&prng) % MAX_W;
if (border[0][0] >= border[0][1] || border[1][0] >= border[1][1])
continue;
decomp_levels = av_lfg_get(&prng) % FF_DWT_MAX_DECLVLS;
- ret = test_dwt(array, ref, border, decomp_levels, FF_DWT53, 0);
+ ret = test_dwt(array, ref, border, decomp_levels, FF_DWT53, 0, slices);
if (ret)
return ret;
- ret = test_dwt(array, ref, border, decomp_levels, FF_DWT97_INT, FFMIN(7+5*decomp_levels, 15+3*decomp_levels));
+ ret = test_dwt(array, ref, border, decomp_levels, FF_DWT97_INT, FFMIN(7+5*decomp_levels, 15+3*decomp_levels), slices);
if (ret)
return ret;
- ret = test_dwtf(arrayf, reff, border, decomp_levels, 0.05);
+ ret = test_dwtf(arrayf, reff, border, decomp_levels, 0.05, slices);
if (ret)
return ret;
}
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile()
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 02/11] lavc/jpeg2000dec: Reindent Tomas Härdin
2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms Tomas Härdin
@ 2022-09-28 10:06 ` Tomas Härdin
2022-09-28 14:14 ` Tomas Härdin
2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and av_reallocz_array_reuse() to eliminate lots of allocations Tomas Härdin
` (6 subsequent siblings)
9 siblings, 1 reply; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:06 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 46 bytes --]
This is the one that needs the new execute2()
[-- Attachment #2: 0004-lavc-jpeg2000dec-Thread-init_tile.patch --]
[-- Type: text/x-patch, Size: 3415 bytes --]
From 4e7c65a7a3e049396ce5e3c01db335a532889115 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Mon, 13 Jun 2022 15:09:17 +0200
Subject: [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile()
---
libavcodec/jpeg2000dec.c | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 273346538f..00aa73e261 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -1019,12 +1019,19 @@ static int get_ppt(Jpeg2000DecoderContext *s, int n)
return 0;
}
-static int init_tile(Jpeg2000DecoderContext *s, int tileno)
+static int init_tile(AVCodecContext *avctx, void *td,
+ int jobnr, int threadnr)
{
- int compno;
- int tilex = tileno % s->numXtiles;
- int tiley = tileno / s->numXtiles;
- Jpeg2000Tile *tile = s->tile + tileno;
+ const Jpeg2000DecoderContext *s = avctx->priv_data;
+ int tileno = jobnr / s->ncomponents;
+ int tilex = tileno % s->numXtiles;
+ int tiley = tileno / s->numXtiles;
+ int compno = jobnr % s->ncomponents;
+ Jpeg2000Tile *tile = s->tile + tileno;
+ Jpeg2000Component *comp = tile->comp + compno;
+ Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+ Jpeg2000QuantStyle *qntsty = tile->qntsty + compno;
+ int ret; // global bandno
if (!tile->comp)
return AVERROR(ENOMEM);
@@ -1034,12 +1041,6 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
tile->coord[1][0] = av_clip(tiley * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height);
tile->coord[1][1] = av_clip((tiley + 1) * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height);
- for (compno = 0; compno < s->ncomponents; compno++) {
- Jpeg2000Component *comp = tile->comp + compno;
- Jpeg2000CodingStyle *codsty = tile->codsty + compno;
- Jpeg2000QuantStyle *qntsty = tile->qntsty + compno;
- int ret; // global bandno
-
comp->coord_o[0][0] = tile->coord[0][0];
comp->coord_o[0][1] = tile->coord[0][1];
comp->coord_o[1][0] = tile->coord[1][0];
@@ -1063,7 +1064,7 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
s->cbps[compno], s->cdx[compno],
s->cdy[compno], s->avctx, s->slices))
return ret;
- }
+
return 0;
}
@@ -2371,9 +2372,6 @@ static int jpeg2000_read_bitstream_packets(Jpeg2000DecoderContext *s)
for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
Jpeg2000Tile *tile = s->tile + tileno;
- if ((ret = init_tile(s, tileno)) < 0)
- return ret;
-
if ((ret = jpeg2000_decode_packets(s, tile)) < 0)
return ret;
}
@@ -2668,6 +2666,9 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
picture->key_frame = 1;
s->slices = avctx->active_thread_type == FF_THREAD_SLICE ? avctx->thread_count : 1;
+ if ((ret = avctx->execute2(avctx, init_tile, NULL, NULL, s->numXtiles * s->numYtiles * s->ncomponents)) < 0)
+ goto end;
+
if (ret = jpeg2000_read_bitstream_packets(s))
goto end;
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile()
2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile() Tomas Härdin
@ 2022-09-28 14:14 ` Tomas Härdin
0 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 14:14 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 170 bytes --]
ons 2022-09-28 klockan 12:06 +0200 skrev Tomas Härdin:
> This is the one that needs the new execute2()
A data race snuck into this one, updated patch attached.
/Tomas
[-- Attachment #2: 0004-lavc-jpeg2000dec-Thread-init_tile.patch --]
[-- Type: text/x-patch, Size: 3559 bytes --]
From 6fc3920731950a1820f88e3ae0cf1258ae17b75d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Mon, 13 Jun 2022 15:09:17 +0200
Subject: [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile()
---
libavcodec/jpeg2000dec.c | 33 ++++++++++++++++++---------------
1 file changed, 18 insertions(+), 15 deletions(-)
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 273346538f..a680eaa1bd 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -1019,26 +1019,29 @@ static int get_ppt(Jpeg2000DecoderContext *s, int n)
return 0;
}
-static int init_tile(Jpeg2000DecoderContext *s, int tileno)
+static int init_tile(AVCodecContext *avctx, void *td,
+ int jobnr, int threadnr)
{
- int compno;
- int tilex = tileno % s->numXtiles;
- int tiley = tileno / s->numXtiles;
- Jpeg2000Tile *tile = s->tile + tileno;
+ const Jpeg2000DecoderContext *s = avctx->priv_data;
+ int tileno = jobnr / s->ncomponents;
+ int tilex = tileno % s->numXtiles;
+ int tiley = tileno / s->numXtiles;
+ int compno = jobnr % s->ncomponents;
+ Jpeg2000Tile *tile = s->tile + tileno;
+ Jpeg2000Component *comp = tile->comp + compno;
+ Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+ Jpeg2000QuantStyle *qntsty = tile->qntsty + compno;
+ int ret; // global bandno
if (!tile->comp)
return AVERROR(ENOMEM);
+ if (compno == 0) {
tile->coord[0][0] = av_clip(tilex * (int64_t)s->tile_width + s->tile_offset_x, s->image_offset_x, s->width);
tile->coord[0][1] = av_clip((tilex + 1) * (int64_t)s->tile_width + s->tile_offset_x, s->image_offset_x, s->width);
tile->coord[1][0] = av_clip(tiley * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height);
tile->coord[1][1] = av_clip((tiley + 1) * (int64_t)s->tile_height + s->tile_offset_y, s->image_offset_y, s->height);
-
- for (compno = 0; compno < s->ncomponents; compno++) {
- Jpeg2000Component *comp = tile->comp + compno;
- Jpeg2000CodingStyle *codsty = tile->codsty + compno;
- Jpeg2000QuantStyle *qntsty = tile->qntsty + compno;
- int ret; // global bandno
+ }
comp->coord_o[0][0] = tile->coord[0][0];
comp->coord_o[0][1] = tile->coord[0][1];
@@ -1063,7 +1066,7 @@ static int init_tile(Jpeg2000DecoderContext *s, int tileno)
s->cbps[compno], s->cdx[compno],
s->cdy[compno], s->avctx, s->slices))
return ret;
- }
+
return 0;
}
@@ -2371,9 +2374,6 @@ static int jpeg2000_read_bitstream_packets(Jpeg2000DecoderContext *s)
for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
Jpeg2000Tile *tile = s->tile + tileno;
- if ((ret = init_tile(s, tileno)) < 0)
- return ret;
-
if ((ret = jpeg2000_decode_packets(s, tile)) < 0)
return ret;
}
@@ -2668,6 +2668,9 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
picture->key_frame = 1;
s->slices = avctx->active_thread_type == FF_THREAD_SLICE ? avctx->thread_count : 1;
+ if ((ret = avctx->execute2(avctx, init_tile, NULL, NULL, s->numXtiles * s->numYtiles * s->ncomponents)) < 0)
+ goto end;
+
if (ret = jpeg2000_read_bitstream_packets(s))
goto end;
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and av_reallocz_array_reuse() to eliminate lots of allocations
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
` (2 preceding siblings ...)
2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile() Tomas Härdin
@ 2022-09-28 10:06 ` Tomas Härdin
2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 06/11] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent Tomas Härdin
` (5 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:06 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #2: 0005-lavc-jpeg2000-Use-av_realloc_array_reuse-and-av_real.patch --]
[-- Type: text/x-patch, Size: 17753 bytes --]
From 5a5986c29d62933f3f2cd2259becb763f3719eaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Mon, 13 Jun 2022 17:04:10 +0200
Subject: [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and
av_reallocz_array_reuse() to eliminate lots of allocations
---
libavcodec/jpeg2000.c | 79 +++++++++++++++++++++-------------------
libavcodec/jpeg2000.h | 10 +++++
libavcodec/jpeg2000dec.c | 48 ++++++++++--------------
libavcodec/jpeg2000dwt.c | 9 ++++-
libavcodec/jpeg2000dwt.h | 2 +
5 files changed, 80 insertions(+), 68 deletions(-)
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 945b787565..afff9809e4 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -52,17 +52,23 @@ static int32_t tag_tree_size(int w, int h)
}
/* allocate the memory for tag tree */
-static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h)
+static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h)
{
int pw = w, ph = h;
- Jpeg2000TgtNode *res, *t, *t2;
+ Jpeg2000TgtNode *t, *t2;
int32_t tt_size;
+ size_t prod;
tt_size = tag_tree_size(w, h);
- t = res = av_calloc(tt_size, sizeof(*t));
- if (!res)
- return NULL;
+ if (av_size_mult(tt_size, sizeof(*t), &prod))
+ return AVERROR(ENOMEM);
+
+ av_fast_malloc(old, size, prod);
+ if (!*old)
+ return AVERROR(ENOMEM);
+ t = *old;
+ memset(*old, 0, prod);
while (w > 1 || h > 1) {
int i, j;
@@ -80,7 +86,7 @@ static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h)
t = t2;
}
t[0].parent = NULL;
- return res;
+ return 0;
}
void ff_tag_tree_zero(Jpeg2000TgtNode *t, int w, int h, int val)
@@ -278,7 +284,7 @@ static int init_prec(AVCodecContext *avctx,
int log2_band_prec_height)
{
Jpeg2000Prec *prec = band->prec + precno;
- int nb_codeblocks, cblkno;
+ int nb_codeblocks, cblkno, ret;
prec->decoded_layers = 0;
@@ -316,25 +322,22 @@ static int init_prec(AVCodecContext *avctx,
/* Tag trees initialization */
- prec->cblkincl =
- ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
- prec->nb_codeblocks_height);
- if (!prec->cblkincl)
- return AVERROR(ENOMEM);
-
- prec->zerobits =
- ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
- prec->nb_codeblocks_height);
- if (!prec->zerobits)
- return AVERROR(ENOMEM);
+ if ((ret = ff_jpeg2000_tag_tree_init(&prec->cblkincl,
+ &prec->cblkincl_size,
+ prec->nb_codeblocks_width,
+ prec->nb_codeblocks_height)) < 0 ||
+ (ret = ff_jpeg2000_tag_tree_init(&prec->zerobits,
+ &prec->zerobits_size,
+ prec->nb_codeblocks_width,
+ prec->nb_codeblocks_height)) < 0)
+ return ret;
if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) {
prec->cblk = NULL;
return AVERROR(ENOMEM);
}
nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height;
- prec->cblk = av_calloc(nb_codeblocks, sizeof(*prec->cblk));
- if (!prec->cblk)
+ if (av_reallocz_array_reuse(&prec->cblk, &prec->cblk_allocated, nb_codeblocks, INT_MAX, sizeof(*prec->cblk)))
return AVERROR(ENOMEM);
for (cblkno = 0; cblkno < nb_codeblocks; cblkno++) {
Jpeg2000Cblk *cblk = prec->cblk + cblkno;
@@ -376,6 +379,7 @@ static int init_prec(AVCodecContext *avctx,
cblk->length = 0;
cblk->npasses = 0;
if (av_codec_is_encoder(avctx->codec)) {
+ av_freep(&cblk->layers);
cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers));
if (!cblk->layers)
return AVERROR(ENOMEM);
@@ -448,8 +452,7 @@ static int init_band(AVCodecContext *avctx,
return AVERROR(ENOMEM);
}
nb_precincts = reslevel->num_precincts_x * reslevel->num_precincts_y;
- band->prec = av_calloc(nb_precincts, sizeof(*band->prec));
- if (!band->prec)
+ if (av_reallocz_array_reuse(&band->prec, &band->prec_allocated, nb_precincts, INT_MAX, sizeof(*band->prec)))
return AVERROR(ENOMEM);
for (precno = 0; precno < nb_precincts; precno++) {
@@ -471,6 +474,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
{
int reslevelno, bandno, gbandno = 0, ret, i, j;
uint32_t csize;
+ size_t prod;
if (codsty->nreslevels2decode <= 0) {
av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode);
@@ -496,19 +500,22 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
if (codsty->transform == FF_DWT97) {
csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->f_data);
- comp->i_data = NULL;
- comp->f_data = av_calloc(csize, sizeof(*comp->f_data));
+ if (av_size_mult(csize, sizeof(*comp->f_data), &prod))
+ return AVERROR(ENOMEM);
+ av_fast_malloc(&comp->f_data, &comp->f_data_size, prod);
if (!comp->f_data)
return AVERROR(ENOMEM);
+ memset(comp->f_data, 0, prod);
} else {
csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
- comp->f_data = NULL;
- comp->i_data = av_calloc(csize, sizeof(*comp->i_data));
+ if (av_size_mult(csize, sizeof(*comp->i_data), &prod))
+ return AVERROR(ENOMEM);
+ av_fast_malloc(&comp->i_data, &comp->i_data_size, prod);
if (!comp->i_data)
return AVERROR(ENOMEM);
+ memset(comp->i_data, 0, prod);
}
- comp->reslevel = av_calloc(codsty->nreslevels, sizeof(*comp->reslevel));
- if (!comp->reslevel)
+ if (av_reallocz_array_reuse(&comp->reslevel, &comp->reslevel_allocated, codsty->nreslevels, INT_MAX, sizeof(*comp->reslevel)))
return AVERROR(ENOMEM);
/* LOOP on resolution levels */
for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++) {
@@ -555,8 +562,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
reslevel->log2_prec_height) -
(reslevel->coord[1][0] >> reslevel->log2_prec_height);
- reslevel->band = av_calloc(reslevel->nbands, sizeof(*reslevel->band));
- if (!reslevel->band)
+ if (av_reallocz_array_reuse(&reslevel->band, &reslevel->band_allocated, reslevel->nbands, INT_MAX, sizeof(*reslevel->band)))
return AVERROR(ENOMEM);
if (reslevel->num_precincts_x * (uint64_t)reslevel->num_precincts_y * reslevel->nbands > avctx->max_pixels / sizeof(*reslevel->band->prec))
@@ -597,9 +603,9 @@ void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
{
- int reslevelno, bandno, precno;
+ size_t reslevelno, bandno, precno;
for (reslevelno = 0;
- comp->reslevel && reslevelno < codsty->nreslevels;
+ comp->reslevel && reslevelno < comp->reslevel_allocated;
reslevelno++) {
Jpeg2000ResLevel *reslevel;
@@ -607,23 +613,20 @@ void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
continue;
reslevel = comp->reslevel + reslevelno;
- for (bandno = 0; bandno < reslevel->nbands; bandno++) {
+ for (bandno = 0; bandno < reslevel->band_allocated; bandno++) {
Jpeg2000Band *band;
if (!reslevel->band)
continue;
band = reslevel->band + bandno;
- for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++) {
+ for (precno = 0; precno < band->prec_allocated; precno++) {
if (band->prec) {
Jpeg2000Prec *prec = band->prec + precno;
- int nb_code_blocks = prec->nb_codeblocks_height * prec->nb_codeblocks_width;
-
av_freep(&prec->zerobits);
av_freep(&prec->cblkincl);
if (prec->cblk) {
- int cblkno;
- for (cblkno = 0; cblkno < nb_code_blocks; cblkno ++) {
+ for (size_t cblkno = 0; cblkno < prec->cblk_allocated; cblkno ++) {
Jpeg2000Cblk *cblk = &prec->cblk[cblkno];
av_freep(&cblk->data);
av_freep(&cblk->passes);
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index 389813a9b9..6594d8e5cb 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -179,6 +179,7 @@ typedef struct Jpeg2000Cblk {
uint8_t incl;
uint16_t length;
uint16_t *lengthinc;
+ size_t lengthinc_allocated;
uint8_t nb_lengthinc;
uint8_t lblock;
uint8_t *data;
@@ -186,6 +187,7 @@ typedef struct Jpeg2000Cblk {
int nb_terminations;
int nb_terminationsinc;
int *data_start;
+ size_t data_start_allocated;
Jpeg2000Pass *passes;
Jpeg2000Layer *layers;
int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
@@ -195,8 +197,11 @@ typedef struct Jpeg2000Prec {
int nb_codeblocks_width;
int nb_codeblocks_height;
Jpeg2000TgtNode *zerobits;
+ unsigned int zerobits_size;
Jpeg2000TgtNode *cblkincl;
+ unsigned int cblkincl_size;
Jpeg2000Cblk *cblk;
+ size_t cblk_allocated;
int decoded_layers;
int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
} Jpeg2000Prec; // precinct
@@ -207,6 +212,7 @@ typedef struct Jpeg2000Band {
int i_stepsize; // quantization stepsize
float f_stepsize; // quantization stepsize
Jpeg2000Prec *prec;
+ size_t prec_allocated;
} Jpeg2000Band; // subband
typedef struct Jpeg2000ResLevel {
@@ -215,13 +221,17 @@ typedef struct Jpeg2000ResLevel {
int num_precincts_x, num_precincts_y; // number of precincts in x/y direction
uint8_t log2_prec_width, log2_prec_height; // exponent of precinct size
Jpeg2000Band *band;
+ size_t band_allocated;
} Jpeg2000ResLevel; // resolution level
typedef struct Jpeg2000Component {
Jpeg2000ResLevel *reslevel;
+ size_t reslevel_allocated;
DWTContext dwt;
float *f_data;
+ unsigned int f_data_size;
int *i_data;
+ unsigned int i_data_size;
int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- can be reduced with lowres option
int coord_o[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- original values from jpeg2000 headers
uint8_t roi_shift; // ROI scaling value for the component
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 00aa73e261..49a815a9b0 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -79,6 +79,7 @@ typedef struct Jpeg2000TilePart {
* one per component, so tile_part elements have a size of 3 */
typedef struct Jpeg2000Tile {
Jpeg2000Component *comp;
+ size_t comp_allocated;
uint8_t properties[4];
Jpeg2000CodingStyle codsty[4];
Jpeg2000QuantStyle qntsty[4];
@@ -141,6 +142,7 @@ typedef struct Jpeg2000DecoderContext {
int curtileno;
Jpeg2000Tile *tile;
+ size_t tile_allocated;
Jpeg2000DSPContext dsp;
/*options parameters*/
@@ -380,8 +382,7 @@ static int get_siz(Jpeg2000DecoderContext *s)
return AVERROR(EINVAL);
}
- s->tile = av_calloc(s->numXtiles * s->numYtiles, sizeof(*s->tile));
- if (!s->tile) {
+ if (av_reallocz_array_reuse(&s->tile, &s->tile_allocated, s->numXtiles * s->numYtiles, INT_MAX, sizeof(*s->tile))) {
s->numXtiles = s->numYtiles = 0;
return AVERROR(ENOMEM);
}
@@ -389,8 +390,7 @@ static int get_siz(Jpeg2000DecoderContext *s)
for (i = 0; i < s->numXtiles * s->numYtiles; i++) {
Jpeg2000Tile *tile = s->tile + i;
- tile->comp = av_mallocz(s->ncomponents * sizeof(*tile->comp));
- if (!tile->comp)
+ if (av_reallocz_array_reuse(&tile->comp, &tile->comp_allocated, s->ncomponents, INT_MAX, sizeof(*tile->comp)))
return AVERROR(ENOMEM);
}
@@ -1160,7 +1160,6 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
for (cblkno = 0; cblkno < nb_code_blocks; cblkno++) {
Jpeg2000Cblk *cblk = prec->cblk + cblkno;
int incl, newpasses, llen;
- void *tmp;
if (cblk->npasses)
incl = get_bits(s, 1);
@@ -1200,14 +1199,10 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
cblk->nb_lengthinc = 0;
cblk->nb_terminationsinc = 0;
- av_free(cblk->lengthinc);
- cblk->lengthinc = av_calloc(newpasses, sizeof(*cblk->lengthinc));
- if (!cblk->lengthinc)
+ if (av_realloc_array_reuse(&cblk->lengthinc, &cblk->lengthinc_allocated, newpasses, INT_MAX, sizeof(*cblk->lengthinc)) ||
+ av_realloc_array_reuse(&cblk->data_start, &cblk->data_start_allocated, cblk->nb_terminations + newpasses + 1, INT_MAX, sizeof(*cblk->data_start)))
return AVERROR(ENOMEM);
- tmp = av_realloc_array(cblk->data_start, cblk->nb_terminations + newpasses + 1, sizeof(*cblk->data_start));
- if (!tmp)
- return AVERROR(ENOMEM);
- cblk->data_start = tmp;
+
do {
int newpasses1 = 0;
@@ -1296,7 +1291,6 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
cblk->data_start[cblk->nb_terminations] = cblk->length;
}
}
- av_freep(&cblk->lengthinc);
}
}
// Save state of stream
@@ -2172,24 +2166,9 @@ static int jpeg2000_mct_write_frame(AVCodecContext *avctx, void *td,
static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s)
{
- int tileno, compno;
- for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
- if (s->tile[tileno].comp) {
- for (compno = 0; compno < s->ncomponents; compno++) {
- Jpeg2000Component *comp = s->tile[tileno].comp + compno;
- Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno;
-
- ff_jpeg2000_cleanup(comp, codsty);
- }
- av_freep(&s->tile[tileno].comp);
- av_freep(&s->tile[tileno].packed_headers);
- s->tile[tileno].packed_headers_size = 0;
- }
- }
av_freep(&s->packed_headers);
s->packed_headers_size = 0;
memset(&s->packed_headers_stream, 0, sizeof(s->packed_headers_stream));
- av_freep(&s->tile);
memset(s->codsty, 0, sizeof(s->codsty));
memset(s->qntsty, 0, sizeof(s->qntsty));
memset(s->properties, 0, sizeof(s->properties));
@@ -2726,6 +2705,19 @@ static av_cold int jpeg2000_decode_close(AVCodecContext *avctx)
{
Jpeg2000DecoderContext *s = avctx->priv_data;
+ for (size_t tileno = 0; tileno < s->tile_allocated; tileno++) {
+ if (s->tile[tileno].comp) {
+ for (size_t compno = 0; compno < s->tile[tileno].comp_allocated; compno++) {
+ Jpeg2000Component *comp = s->tile[tileno].comp + compno;
+ Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno;
+
+ ff_jpeg2000_cleanup(comp, codsty);
+ }
+ av_freep(&s->tile[tileno].comp);
+ av_freep(&s->tile[tileno].packed_headers);
+ }
+ }
+ av_freep(&s->tile);
av_freep(&s->idwt);
av_freep(&s->cb);
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index 921461b6d7..f3ddefe48f 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -531,6 +531,7 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
{
int i, j, lev = decomp_levels,
b[2][2];
+ size_t prod;
s->ndeclevels = decomp_levels;
s->type = type;
@@ -556,11 +557,15 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
}
if (type == FF_DWT97) {
- s->f_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->f_linebuf));
+ if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->f_linebuf), &prod))
+ return AVERROR(ENOMEM);
+ av_fast_malloc(&s->f_linebuf, &s->f_linebuf_size, prod);
if (!s->f_linebuf)
return AVERROR(ENOMEM);
} else {
- s->i_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->i_linebuf));
+ if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->i_linebuf), &prod))
+ return AVERROR(ENOMEM);
+ av_fast_malloc(&s->i_linebuf, &s->i_linebuf_size, prod);
if (!s->i_linebuf)
return AVERROR(ENOMEM);
}
diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h
index d5e94c9916..fb6fc8f121 100644
--- a/libavcodec/jpeg2000dwt.h
+++ b/libavcodec/jpeg2000dwt.h
@@ -48,7 +48,9 @@ typedef struct DWTContext {
uint8_t ndeclevels; ///< number of decomposition levels
uint8_t type; ///< 0 for 9/7; 1 for 5/3
int32_t *i_linebuf; ///< int buffer used by transform
+ unsigned int i_linebuf_size;
float *f_linebuf; ///< float buffer used by transform
+ unsigned int f_linebuf_size;
int max_slices;
int linesize;
} DWTContext;
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 06/11] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
` (3 preceding siblings ...)
2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and av_reallocz_array_reuse() to eliminate lots of allocations Tomas Härdin
@ 2022-09-28 10:07 ` Tomas Härdin
2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 07/11] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4 Tomas Härdin
` (4 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:07 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #2: 0006-lavc-jpeg2000-Switch-Jpeg2000TgtNode-to-int32_t-pare.patch --]
[-- Type: text/x-patch, Size: 9160 bytes --]
From 807d7d315269126e7eccd0c36d7c29615cb98676 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 7 Jun 2022 16:43:40 +0200
Subject: [PATCH 06/11] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent
---
libavcodec/j2kenc.c | 44 ++++++++++++++++++++--------------------
libavcodec/jpeg2000.c | 20 +++++++++---------
libavcodec/jpeg2000.h | 2 +-
libavcodec/jpeg2000dec.c | 18 ++++++++--------
4 files changed, 42 insertions(+), 42 deletions(-)
diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
index cd325e94e0..8f23ddbcf6 100644
--- a/libavcodec/j2kenc.c
+++ b/libavcodec/j2kenc.c
@@ -249,36 +249,36 @@ static void j2k_flush(Jpeg2000EncoderContext *s)
/* tag tree routines */
/** code the value stored in node */
-static void tag_tree_code(Jpeg2000EncoderContext *s, Jpeg2000TgtNode *node, int threshold)
+static void tag_tree_code(Jpeg2000EncoderContext *s, Jpeg2000TgtNode *nodes, int32_t node, int threshold)
{
- Jpeg2000TgtNode *stack[30];
+ int32_t stack[30];
int sp = -1, curval = 0;
- while(node->parent){
+ while(nodes[node].parent >= 0){
stack[++sp] = node;
- node = node->parent;
+ node = nodes[node].parent;
}
while (1) {
- if (curval > node->temp_val)
- node->temp_val = curval;
+ if (curval > nodes[node].temp_val)
+ nodes[node].temp_val = curval;
else {
- curval = node->temp_val;
+ curval = nodes[node].temp_val;
}
- if (node->val >= threshold) {
+ if (nodes[node].val >= threshold) {
put_bits(s, 0, threshold - curval);
curval = threshold;
} else {
- put_bits(s, 0, node->val - curval);
- curval = node->val;
- if (!node->vis) {
+ put_bits(s, 0, nodes[node].val - curval);
+ curval = nodes[node].val;
+ if (!nodes[node].vis) {
put_bits(s, 1, 1);
- node->vis = 1;
+ nodes[node].vis = 1;
}
}
- node->temp_val = curval;
+ nodes[node].temp_val = curval;
if (sp < 0)
break;
node = stack[sp--];
@@ -286,13 +286,13 @@ static void tag_tree_code(Jpeg2000EncoderContext *s, Jpeg2000TgtNode *node, int
}
/** update the value in node */
-static void tag_tree_update(Jpeg2000TgtNode *node)
+static void tag_tree_update(Jpeg2000TgtNode *nodes, int node)
{
- while (node->parent){
- if (node->parent->val <= node->val)
+ while (nodes[node].parent >= 0){
+ if (nodes[nodes[node].parent].val <= nodes[node].val)
break;
- node->parent->val = node->val;
- node = node->parent;
+ nodes[nodes[node].parent].val = nodes[node].val;
+ node = nodes[node].parent;
}
}
@@ -812,7 +812,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in
prec->zerobits[pos].val = expn[bandno] + numgbits - 1 - cblk->nonzerobits;
cblk->incl = 0;
cblk->lblock = 3;
- tag_tree_update(prec->zerobits + pos);
+ tag_tree_update(prec->zerobits, pos);
for (i = 0; i < nlayers; i++) {
if (cblk->layers[i].npasses > 0) {
prec->cblkincl[pos].val = i;
@@ -821,7 +821,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in
}
if (i == nlayers)
prec->cblkincl[pos].val = i;
- tag_tree_update(prec->cblkincl + pos);
+ tag_tree_update(prec->cblkincl, pos);
}
}
}
@@ -875,7 +875,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in
// inclusion information
if (!cblk->incl)
- tag_tree_code(s, prec->cblkincl + pos, layno + 1);
+ tag_tree_code(s, prec->cblkincl, pos, layno + 1);
else {
put_bits(s, cblk->layers[layno].npasses > 0, 1);
}
@@ -885,7 +885,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, in
// zerobits information
if (!cblk->incl) {
- tag_tree_code(s, prec->zerobits + pos, 100);
+ tag_tree_code(s, prec->zerobits, pos, 100);
cblk->incl = 1;
}
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index afff9809e4..4ddb45bf33 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -55,8 +55,8 @@ static int32_t tag_tree_size(int w, int h)
static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h)
{
int pw = w, ph = h;
- Jpeg2000TgtNode *t, *t2;
- int32_t tt_size;
+ Jpeg2000TgtNode *t;
+ int32_t tt_size, ofs = 0;
size_t prod;
tt_size = tag_tree_size(w, h);
@@ -77,15 +77,15 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size,
w = (w + 1) >> 1;
h = (h + 1) >> 1;
- t2 = t + pw * ph;
+ ofs += pw * ph;
for (i = 0; i < ph; i++)
for (j = 0; j < pw; j++)
- t[i * pw + j].parent = &t2[(i >> 1) * w + (j >> 1)];
+ t[i * pw + j].parent = (i >> 1) * w + (j >> 1) + ofs;
- t = t2;
+ t += pw * ph;
}
- t[0].parent = NULL;
+ t[0].parent = -1;
return 0;
}
@@ -320,6 +320,10 @@ static int init_prec(AVCodecContext *avctx,
band->log2_cblk_height)
- (prec->coord[1][0] >> band->log2_cblk_height);
+ /* \sum_{i=0}^\inf 4^-i = 4/3 */
+ if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT32_MAX / 4 * 3) {
+ return AVERROR(ENOMEM);
+ }
/* Tag trees initialization */
if ((ret = ff_jpeg2000_tag_tree_init(&prec->cblkincl,
@@ -332,10 +336,6 @@ static int init_prec(AVCodecContext *avctx,
prec->nb_codeblocks_height)) < 0)
return ret;
- if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) {
- prec->cblk = NULL;
- return AVERROR(ENOMEM);
- }
nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height;
if (av_reallocz_array_reuse(&prec->cblk, &prec->cblk_allocated, nb_codeblocks, INT_MAX, sizeof(*prec->cblk)))
return AVERROR(ENOMEM);
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index 6594d8e5cb..c9a2e55efa 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -128,10 +128,10 @@ typedef struct Jpeg2000T1Context {
} Jpeg2000T1Context;
typedef struct Jpeg2000TgtNode {
+ int32_t parent;
uint8_t val;
uint8_t temp_val;
uint8_t vis;
- struct Jpeg2000TgtNode *parent;
} Jpeg2000TgtNode;
typedef struct Jpeg2000CodingStyle {
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 49a815a9b0..46f7d841b5 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -185,24 +185,24 @@ static void jpeg2000_flush(Jpeg2000DecoderContext *s)
}
/* decode the value stored in node */
-static int tag_tree_decode(Jpeg2000DecoderContext *s, Jpeg2000TgtNode *node,
+static int tag_tree_decode(Jpeg2000DecoderContext *s, Jpeg2000TgtNode *nodes, int32_t node,
int threshold)
{
Jpeg2000TgtNode *stack[30];
int sp = -1, curval = 0;
- if (!node) {
+ if (node < 0) {
av_log(s->avctx, AV_LOG_ERROR, "missing node\n");
return AVERROR_INVALIDDATA;
}
- while (node && !node->vis) {
- stack[++sp] = node;
- node = node->parent;
+ while (node >= 0 && !nodes[node].vis) {
+ stack[++sp] = &nodes[node];
+ node = nodes[node].parent;
}
- if (node)
- curval = node->val;
+ if (node >= 0)
+ curval = nodes[node].val;
else
curval = stack[sp]->val;
@@ -1164,7 +1164,7 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
if (cblk->npasses)
incl = get_bits(s, 1);
else
- incl = tag_tree_decode(s, prec->cblkincl + cblkno, layno + 1) == layno;
+ incl = tag_tree_decode(s, prec->cblkincl, cblkno, layno + 1) == layno;
if (!incl)
continue;
else if (incl < 0)
@@ -1172,7 +1172,7 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
if (!cblk->npasses) {
int v = expn[bandno] + numgbits - 1 -
- tag_tree_decode(s, prec->zerobits + cblkno, 100);
+ tag_tree_decode(s, prec->zerobits, cblkno, 100);
if (v < 0 || v > 30) {
av_log(s->avctx, AV_LOG_ERROR,
"nonzerobits %d invalid or unsupported\n", v);
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 07/11] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
` (4 preceding siblings ...)
2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 06/11] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent Tomas Härdin
@ 2022-09-28 10:07 ` Tomas Härdin
2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 08/11] lavc/jpeg2000: Reindent Tomas Härdin
` (3 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:07 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #2: 0007-lavc-jpeg2000-Speed-up-ff_jpeg2000_tag_tree_init-usi.patch --]
[-- Type: text/x-patch, Size: 2598 bytes --]
From a31a6d82b257f51618389a67af18d49cc78ac240 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Wed, 8 Jun 2022 10:08:15 +0200
Subject: [PATCH 07/11] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init()
using stereotypes for sizes <= 4x4
---
libavcodec/jpeg2000.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 4ddb45bf33..203782502c 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -51,6 +51,31 @@ static int32_t tag_tree_size(int w, int h)
return (int32_t)(res + 1);
}
+#define T(x) (x*sizeof(Jpeg2000TgtNode))
+
+static const size_t tt_sizes[16] = {
+ T(1),T(3),T(6),T(7),T(3),T(5),T(9),T(11),T(6),T(9),T(14),T(17),T(7),T(11),T(17),T(21),
+};
+
+static const Jpeg2000TgtNode tt_stereotypes[16][21] = {
+ {{-1},},
+ {{2},{2},{-1},},
+ {{3},{3},{4},{5},{5},{-1},},
+ {{4},{4},{5},{5},{6},{6},{-1},},
+ {{2},{2},{-1},},
+ {{4},{4},{4},{4},{-1},},
+ {{6},{6},{7},{6},{6},{7},{8},{8},{-1},},
+ {{8},{8},{9},{9},{8},{8},{9},{9},{10},{10},{-1},},
+ {{3},{3},{4},{5},{5},{-1},},
+ {{6},{6},{6},{6},{7},{7},{8},{8},{-1},},
+ {{9},{9},{10},{9},{9},{10},{11},{11},{12},{13},{13},{13},{13},{-1},},
+ {{12},{12},{13},{13},{12},{12},{13},{13},{14},{14},{15},{15},{16},{16},{16},{16},{-1},},
+ {{4},{4},{5},{5},{6},{6},{-1},},
+ {{8},{8},{8},{8},{9},{9},{9},{9},{10},{10},{-1},},
+ {{12},{12},{13},{12},{12},{13},{14},{14},{15},{14},{14},{15},{16},{16},{16},{16},{-1},},
+ {{16},{16},{17},{17},{16},{16},{17},{17},{18},{18},{19},{19},{18},{18},{19},{19},{20},{20},{20},{20},{-1},},
+};
+
/* allocate the memory for tag tree */
static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h)
{
@@ -59,6 +84,16 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size,
int32_t tt_size, ofs = 0;
size_t prod;
+ if (w <= 4 && h <= 4) {
+ int idx = w-1 + (h-1)*4;
+ size_t sz = tt_sizes[idx];
+ av_fast_malloc(old, size, sz);
+ if (*old) {
+ memcpy(*old, tt_stereotypes[idx], sz);
+ return 0;
+ } else
+ return AVERROR(ENOMEM);
+ } else {
tt_size = tag_tree_size(w, h);
if (av_size_mult(tt_size, sizeof(*t), &prod))
@@ -87,6 +122,7 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size,
}
t[0].parent = -1;
return 0;
+ }
}
void ff_tag_tree_zero(Jpeg2000TgtNode *t, int w, int h, int val)
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 08/11] lavc/jpeg2000: Reindent
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
` (5 preceding siblings ...)
2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 07/11] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4 Tomas Härdin
@ 2022-09-28 10:07 ` Tomas Härdin
2022-09-28 10:08 ` [FFmpeg-devel] [PATCH 09/11] lavc/jpeg2000: Minimize calls to av_codec_is_encoder() Tomas Härdin
` (2 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:07 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #2: 0008-lavc-jpeg2000-Reindent.patch --]
[-- Type: text/x-patch, Size: 2032 bytes --]
From 2e915fbbd481c1d202c0ec058842cfc9f9593871 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 14 Jun 2022 11:23:08 +0200
Subject: [PATCH 08/11] lavc/jpeg2000: Reindent
---
libavcodec/jpeg2000.c | 44 +++++++++++++++++++++----------------------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 203782502c..2574c2b97e 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -94,34 +94,34 @@ static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size,
} else
return AVERROR(ENOMEM);
} else {
- tt_size = tag_tree_size(w, h);
+ tt_size = tag_tree_size(w, h);
- if (av_size_mult(tt_size, sizeof(*t), &prod))
- return AVERROR(ENOMEM);
+ if (av_size_mult(tt_size, sizeof(*t), &prod))
+ return AVERROR(ENOMEM);
- av_fast_malloc(old, size, prod);
- if (!*old)
- return AVERROR(ENOMEM);
- t = *old;
- memset(*old, 0, prod);
+ av_fast_malloc(old, size, prod);
+ if (!*old)
+ return AVERROR(ENOMEM);
+ t = *old;
+ memset(*old, 0, prod);
- while (w > 1 || h > 1) {
- int i, j;
- pw = w;
- ph = h;
+ while (w > 1 || h > 1) {
+ int i, j;
+ pw = w;
+ ph = h;
- w = (w + 1) >> 1;
- h = (h + 1) >> 1;
- ofs += pw * ph;
+ w = (w + 1) >> 1;
+ h = (h + 1) >> 1;
+ ofs += pw * ph;
- for (i = 0; i < ph; i++)
- for (j = 0; j < pw; j++)
- t[i * pw + j].parent = (i >> 1) * w + (j >> 1) + ofs;
+ for (i = 0; i < ph; i++)
+ for (j = 0; j < pw; j++)
+ t[i * pw + j].parent = (i >> 1) * w + (j >> 1) + ofs;
- t += pw * ph;
- }
- t[0].parent = -1;
- return 0;
+ t += pw * ph;
+ }
+ t[0].parent = -1;
+ return 0;
}
}
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 09/11] lavc/jpeg2000: Minimize calls to av_codec_is_encoder()
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
` (6 preceding siblings ...)
2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 08/11] lavc/jpeg2000: Reindent Tomas Härdin
@ 2022-09-28 10:08 ` Tomas Härdin
2022-09-28 10:09 ` [FFmpeg-devel] [PATCH 10/11] lavc/jpeg2000dec: Use coarser slicing for initial reslevels Tomas Härdin
2022-09-28 10:10 ` [FFmpeg-devel] [PATCH 11/11] lavc/jpeg2000dec: Component-level threading of write_frame() Tomas Härdin
9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:08 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 1 bytes --]
[-- Attachment #2: 0009-lavc-jpeg2000-Minimize-calls-to-av_codec_is_encoder.patch --]
[-- Type: text/x-patch, Size: 4211 bytes --]
From ecb1d9ff671b83bddb0d1c7d31d60ade4b5cdead Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 14 Jun 2022 10:57:45 +0200
Subject: [PATCH 09/11] lavc/jpeg2000: Minimize calls to av_codec_is_encoder()
---
libavcodec/jpeg2000.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 2574c2b97e..bb6efab72e 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -248,7 +248,7 @@ static void init_band_stepsize(AVCodecContext *avctx,
Jpeg2000CodingStyle *codsty,
Jpeg2000QuantStyle *qntsty,
int bandno, int gbandno, int reslevelno,
- int cbps)
+ int cbps, int is_enc)
{
/* TODO: Implementation of quantization step not finished,
* see ISO/IEC 15444-1:2002 E.1 and A.6.4. */
@@ -306,7 +306,7 @@ static void init_band_stepsize(AVCodecContext *avctx,
/* FIXME: In OpenJPEG code stepsize = stepsize * 0.5. Why?
* If not set output of entropic decoder is not correct. */
- if (!av_codec_is_encoder(avctx->codec))
+ if (!is_enc)
band->f_stepsize *= 0.5;
}
@@ -317,7 +317,8 @@ static int init_prec(AVCodecContext *avctx,
Jpeg2000CodingStyle *codsty,
int precno, int bandno, int reslevelno,
int log2_band_prec_width,
- int log2_band_prec_height)
+ int log2_band_prec_height,
+ int is_enc)
{
Jpeg2000Prec *prec = band->prec + precno;
int nb_codeblocks, cblkno, ret;
@@ -414,7 +415,7 @@ static int init_prec(AVCodecContext *avctx,
cblk->lblock = 3;
cblk->length = 0;
cblk->npasses = 0;
- if (av_codec_is_encoder(avctx->codec)) {
+ if (is_enc) {
av_freep(&cblk->layers);
cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers));
if (!cblk->layers)
@@ -431,7 +432,7 @@ static int init_band(AVCodecContext *avctx,
Jpeg2000CodingStyle *codsty,
Jpeg2000QuantStyle *qntsty,
int bandno, int gbandno, int reslevelno,
- int cbps, int dx, int dy)
+ int cbps, int dx, int dy, int is_enc)
{
Jpeg2000Band *band = reslevel->band + bandno;
uint8_t log2_band_prec_width, log2_band_prec_height;
@@ -440,7 +441,7 @@ static int init_band(AVCodecContext *avctx,
int nb_precincts;
int i, j, ret;
- init_band_stepsize(avctx, band, codsty, qntsty, bandno, gbandno, reslevelno, cbps);
+ init_band_stepsize(avctx, band, codsty, qntsty, bandno, gbandno, reslevelno, cbps, is_enc);
/* computation of tbx_0, tbx_1, tby_0, tby_1
* see ISO/IEC 15444-1:2002 B.5 eq. B-15 and tbl B.1
@@ -494,7 +495,8 @@ static int init_band(AVCodecContext *avctx,
for (precno = 0; precno < nb_precincts; precno++) {
ret = init_prec(avctx, band, reslevel, comp, codsty,
precno, bandno, reslevelno,
- log2_band_prec_width, log2_band_prec_height);
+ log2_band_prec_width, log2_band_prec_height,
+ is_enc);
if (ret < 0)
return ret;
}
@@ -511,6 +513,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
int reslevelno, bandno, gbandno = 0, ret, i, j;
uint32_t csize;
size_t prod;
+ int is_enc = av_codec_is_encoder(avctx->codec);
if (codsty->nreslevels2decode <= 0) {
av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode);
@@ -608,7 +611,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
ret = init_band(avctx, reslevel,
comp, codsty, qntsty,
bandno, gbandno, reslevelno,
- cbps, dx, dy);
+ cbps, dx, dy, is_enc);
if (ret < 0)
return ret;
}
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 10/11] lavc/jpeg2000dec: Use coarser slicing for initial reslevels
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
` (7 preceding siblings ...)
2022-09-28 10:08 ` [FFmpeg-devel] [PATCH 09/11] lavc/jpeg2000: Minimize calls to av_codec_is_encoder() Tomas Härdin
@ 2022-09-28 10:09 ` Tomas Härdin
2022-09-28 10:10 ` [FFmpeg-devel] [PATCH 11/11] lavc/jpeg2000dec: Component-level threading of write_frame() Tomas Härdin
9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:09 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 156 bytes --]
This patch is more informal, there's probably a better way to bump
performance up above 50 fps. This is for the smoke_sauna sample on
SVT's FTP by the way.
[-- Attachment #2: 0010-lavc-jpeg2000dec-Use-coarser-slicing-for-initial-res.patch --]
[-- Type: text/x-patch, Size: 1338 bytes --]
From 103c38adabae39a607049ef517de43f4d2f9d406 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 14 Jun 2022 11:19:06 +0200
Subject: [PATCH 10/11] lavc/jpeg2000dec: Use coarser slicing for initial
reslevels
This brings -lowres 2 lossless 4K J2K on an AMD EPYC 7R32 to 52 fps (2080% CPU).
---
libavcodec/jpeg2000dec.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 46f7d841b5..71176d944d 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -2673,9 +2673,19 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
for (s->reslevel = 0; s->reslevel < maxreslevels; s->reslevel++) {
for (s->dir = 0; s->dir < 2; s->dir++) {
+ int before = s->slices;
+ int div = s->slices >= 96 ? 7 : 5;
+
+ if (s->reslevel < div) {
+ int halve = 1<<(div - s->reslevel + (s->slices >= 96 ? 0 : 1 - s->dir));
+ s->slices = (s->slices + halve-1)/halve;
+ }
+
if ((ret = avctx->execute2(avctx, jpeg2000_idwt, NULL, NULL,
s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
goto end;
+
+ s->slices = before;
}
}
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread
* [FFmpeg-devel] [PATCH 11/11] lavc/jpeg2000dec: Component-level threading of write_frame()
2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin
` (8 preceding siblings ...)
2022-09-28 10:09 ` [FFmpeg-devel] [PATCH 10/11] lavc/jpeg2000dec: Use coarser slicing for initial reslevels Tomas Härdin
@ 2022-09-28 10:10 ` Tomas Härdin
9 siblings, 0 replies; 12+ messages in thread
From: Tomas Härdin @ 2022-09-28 10:10 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1: Type: text/plain, Size: 99 bytes --]
Ideally the clipping done by write_frame() and the MCT stuff would be
done at the final IDWT stage
[-- Attachment #2: 0011-lavc-jpeg2000dec-Component-level-threading-of-write_.patch --]
[-- Type: text/x-patch, Size: 5610 bytes --]
From 34f055bb0732085d6f97d8f27890b47afb8ca868 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 14 Jun 2022 15:45:32 +0200
Subject: [PATCH 11/11] lavc/jpeg2000dec: Component-level threading of
write_frame()
Split off MCT and don't bother with it unless the picture actually uses MCT.
---
libavcodec/jpeg2000dec.c | 37 ++++++++++++++++++++++++++-----------
1 file changed, 26 insertions(+), 11 deletions(-)
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 71176d944d..8b984cfc27 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -156,6 +156,7 @@ typedef struct Jpeg2000DecoderContext {
// used for idwt slicing
int reslevel, dir, slices;
int have_dwt97_int; // 1 if any coding style is FF_DWT97_INT
+ int have_mct;
} Jpeg2000DecoderContext;
/* get_bits functions for JPEG2000 packet bitstream
@@ -604,6 +605,9 @@ static int get_cod(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c,
return AVERROR_INVALIDDATA;
}
+ if (tmp.mct)
+ s->have_mct = 1;
+
if ((ret = get_cox(s, &tmp)) < 0)
return ret;
tmp.init = 1;
@@ -2074,16 +2078,14 @@ static int jpeg2000_dwt97_int_postshift(AVCodecContext *avctx, void *td,
#define WRITE_FRAME(D, PIXEL) \
static inline void write_frame_ ## D(const Jpeg2000DecoderContext * s, Jpeg2000Tile * tile, \
- AVFrame * picture, int precision) \
+ AVFrame * picture, int precision, int compno) \
{ \
const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->avctx->pix_fmt); \
int planar = !!(pixdesc->flags & AV_PIX_FMT_FLAG_PLANAR); \
int pixelsize = planar ? 1 : pixdesc->nb_components; \
\
- int compno; \
int x, y; \
\
- for (compno = 0; compno < s->ncomponents; compno++) { \
Jpeg2000Component *comp = tile->comp + compno; \
Jpeg2000CodingStyle *codsty = tile->codsty + compno; \
PIXEL *line; \
@@ -2130,8 +2132,6 @@ static int jpeg2000_dwt97_int_postshift(AVCodecContext *avctx, void *td,
} \
line += picture->linesize[plane] / sizeof(PIXEL); \
} \
- } \
- \
}
WRITE_FRAME(8, uint8_t)
@@ -2139,26 +2139,36 @@ WRITE_FRAME(16, uint16_t)
#undef WRITE_FRAME
-static int jpeg2000_mct_write_frame(AVCodecContext *avctx, void *td,
- int jobnr, int threadnr)
+static int jpeg2000_mct(AVCodecContext *avctx, void *td,
+ int jobnr, int threadnr)
{
const Jpeg2000DecoderContext *s = avctx->priv_data;
- AVFrame *picture = td;
Jpeg2000Tile *tile = s->tile + jobnr;
/* inverse MCT transformation */
if (tile->codsty[0].mct)
mct_decode(s, tile);
+ return 0;
+}
+
+static int jpeg2000_write_frame(AVCodecContext *avctx, void *td,
+ int jobnr, int threadnr)
+{
+ Jpeg2000DecoderContext *s = avctx->priv_data;
+ AVFrame *picture = td;
+ Jpeg2000Tile *tile = s->tile + jobnr / s->ncomponents;
+ int compno = jobnr % s->ncomponents;
+
if (s->precision <= 8) {
- write_frame_8(s, tile, picture, 8);
+ write_frame_8(s, tile, picture, 8, compno);
} else {
int precision = picture->format == AV_PIX_FMT_XYZ12 ||
picture->format == AV_PIX_FMT_RGB48 ||
picture->format == AV_PIX_FMT_RGBA64 ||
picture->format == AV_PIX_FMT_GRAY16 ? 16 : s->precision;
- write_frame_16(s, tile, picture, precision);
+ write_frame_16(s, tile, picture, precision, compno);
}
return 0;
@@ -2694,7 +2704,12 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
goto end;
- if ((ret = avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles)) < 0)
+ if (s->have_mct &&
+ (ret = avctx->execute2(avctx, jpeg2000_mct, NULL, NULL, s->numXtiles * s->numYtiles)) < 0)
+ goto end;
+
+ if ((ret = avctx->execute2(avctx, jpeg2000_write_frame, picture, NULL,
+ s->numXtiles * s->numYtiles * s->ncomponents)) < 0)
goto end;
jpeg2000_dec_cleanup(s);
--
2.30.2
[-- Attachment #3: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 12+ messages in thread