From: "Tomas Härdin" <git@haerdin.se> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and av_reallocz_array_reuse() to eliminate lots of allocations Date: Wed, 28 Sep 2022 12:06:38 +0200 Message-ID: <9e78ef32ebe31cebd72f0ef31a42892ccb9017ab.camel@haerdin.se> (raw) In-Reply-To: <65e79fe701374868bb2f4b70ce8fd220938e2e86.camel@haerdin.se> [-- Attachment #1: Type: text/plain, Size: 1 bytes --] [-- Attachment #2: 0005-lavc-jpeg2000-Use-av_realloc_array_reuse-and-av_real.patch --] [-- Type: text/x-patch, Size: 17753 bytes --] From 5a5986c29d62933f3f2cd2259becb763f3719eaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se> Date: Mon, 13 Jun 2022 17:04:10 +0200 Subject: [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and av_reallocz_array_reuse() to eliminate lots of allocations --- libavcodec/jpeg2000.c | 79 +++++++++++++++++++++------------------- libavcodec/jpeg2000.h | 10 +++++ libavcodec/jpeg2000dec.c | 48 ++++++++++-------------- libavcodec/jpeg2000dwt.c | 9 ++++- libavcodec/jpeg2000dwt.h | 2 + 5 files changed, 80 insertions(+), 68 deletions(-) diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c index 945b787565..afff9809e4 100644 --- a/libavcodec/jpeg2000.c +++ b/libavcodec/jpeg2000.c @@ -52,17 +52,23 @@ static int32_t tag_tree_size(int w, int h) } /* allocate the memory for tag tree */ -static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h) +static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h) { int pw = w, ph = h; - Jpeg2000TgtNode *res, *t, *t2; + Jpeg2000TgtNode *t, *t2; int32_t tt_size; + size_t prod; tt_size = tag_tree_size(w, h); - t = res = av_calloc(tt_size, sizeof(*t)); - if (!res) - return NULL; + if (av_size_mult(tt_size, sizeof(*t), &prod)) + return AVERROR(ENOMEM); + + av_fast_malloc(old, size, prod); + if (!*old) + return AVERROR(ENOMEM); + t = *old; + memset(*old, 0, prod); while (w > 1 || h > 1) { int i, j; @@ -80,7 +86,7 @@ static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h) t = t2; } t[0].parent = NULL; - return res; + return 0; } void ff_tag_tree_zero(Jpeg2000TgtNode *t, int w, int h, int val) @@ -278,7 +284,7 @@ static int init_prec(AVCodecContext *avctx, int log2_band_prec_height) { Jpeg2000Prec *prec = band->prec + precno; - int nb_codeblocks, cblkno; + int nb_codeblocks, cblkno, ret; prec->decoded_layers = 0; @@ -316,25 +322,22 @@ static int init_prec(AVCodecContext *avctx, /* Tag trees initialization */ - prec->cblkincl = - ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width, - prec->nb_codeblocks_height); - if (!prec->cblkincl) - return AVERROR(ENOMEM); - - prec->zerobits = - ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width, - prec->nb_codeblocks_height); - if (!prec->zerobits) - return AVERROR(ENOMEM); + if ((ret = ff_jpeg2000_tag_tree_init(&prec->cblkincl, + &prec->cblkincl_size, + prec->nb_codeblocks_width, + prec->nb_codeblocks_height)) < 0 || + (ret = ff_jpeg2000_tag_tree_init(&prec->zerobits, + &prec->zerobits_size, + prec->nb_codeblocks_width, + prec->nb_codeblocks_height)) < 0) + return ret; if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) { prec->cblk = NULL; return AVERROR(ENOMEM); } nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height; - prec->cblk = av_calloc(nb_codeblocks, sizeof(*prec->cblk)); - if (!prec->cblk) + if (av_reallocz_array_reuse(&prec->cblk, &prec->cblk_allocated, nb_codeblocks, INT_MAX, sizeof(*prec->cblk))) return AVERROR(ENOMEM); for (cblkno = 0; cblkno < nb_codeblocks; cblkno++) { Jpeg2000Cblk *cblk = prec->cblk + cblkno; @@ -376,6 +379,7 @@ static int init_prec(AVCodecContext *avctx, cblk->length = 0; cblk->npasses = 0; if (av_codec_is_encoder(avctx->codec)) { + av_freep(&cblk->layers); cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers)); if (!cblk->layers) return AVERROR(ENOMEM); @@ -448,8 +452,7 @@ static int init_band(AVCodecContext *avctx, return AVERROR(ENOMEM); } nb_precincts = reslevel->num_precincts_x * reslevel->num_precincts_y; - band->prec = av_calloc(nb_precincts, sizeof(*band->prec)); - if (!band->prec) + if (av_reallocz_array_reuse(&band->prec, &band->prec_allocated, nb_precincts, INT_MAX, sizeof(*band->prec))) return AVERROR(ENOMEM); for (precno = 0; precno < nb_precincts; precno++) { @@ -471,6 +474,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, { int reslevelno, bandno, gbandno = 0, ret, i, j; uint32_t csize; + size_t prod; if (codsty->nreslevels2decode <= 0) { av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode); @@ -496,19 +500,22 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, if (codsty->transform == FF_DWT97) { csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->f_data); - comp->i_data = NULL; - comp->f_data = av_calloc(csize, sizeof(*comp->f_data)); + if (av_size_mult(csize, sizeof(*comp->f_data), &prod)) + return AVERROR(ENOMEM); + av_fast_malloc(&comp->f_data, &comp->f_data_size, prod); if (!comp->f_data) return AVERROR(ENOMEM); + memset(comp->f_data, 0, prod); } else { csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data); - comp->f_data = NULL; - comp->i_data = av_calloc(csize, sizeof(*comp->i_data)); + if (av_size_mult(csize, sizeof(*comp->i_data), &prod)) + return AVERROR(ENOMEM); + av_fast_malloc(&comp->i_data, &comp->i_data_size, prod); if (!comp->i_data) return AVERROR(ENOMEM); + memset(comp->i_data, 0, prod); } - comp->reslevel = av_calloc(codsty->nreslevels, sizeof(*comp->reslevel)); - if (!comp->reslevel) + if (av_reallocz_array_reuse(&comp->reslevel, &comp->reslevel_allocated, codsty->nreslevels, INT_MAX, sizeof(*comp->reslevel))) return AVERROR(ENOMEM); /* LOOP on resolution levels */ for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++) { @@ -555,8 +562,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, reslevel->log2_prec_height) - (reslevel->coord[1][0] >> reslevel->log2_prec_height); - reslevel->band = av_calloc(reslevel->nbands, sizeof(*reslevel->band)); - if (!reslevel->band) + if (av_reallocz_array_reuse(&reslevel->band, &reslevel->band_allocated, reslevel->nbands, INT_MAX, sizeof(*reslevel->band))) return AVERROR(ENOMEM); if (reslevel->num_precincts_x * (uint64_t)reslevel->num_precincts_y * reslevel->nbands > avctx->max_pixels / sizeof(*reslevel->band->prec)) @@ -597,9 +603,9 @@ void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty) void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty) { - int reslevelno, bandno, precno; + size_t reslevelno, bandno, precno; for (reslevelno = 0; - comp->reslevel && reslevelno < codsty->nreslevels; + comp->reslevel && reslevelno < comp->reslevel_allocated; reslevelno++) { Jpeg2000ResLevel *reslevel; @@ -607,23 +613,20 @@ void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty) continue; reslevel = comp->reslevel + reslevelno; - for (bandno = 0; bandno < reslevel->nbands; bandno++) { + for (bandno = 0; bandno < reslevel->band_allocated; bandno++) { Jpeg2000Band *band; if (!reslevel->band) continue; band = reslevel->band + bandno; - for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++) { + for (precno = 0; precno < band->prec_allocated; precno++) { if (band->prec) { Jpeg2000Prec *prec = band->prec + precno; - int nb_code_blocks = prec->nb_codeblocks_height * prec->nb_codeblocks_width; - av_freep(&prec->zerobits); av_freep(&prec->cblkincl); if (prec->cblk) { - int cblkno; - for (cblkno = 0; cblkno < nb_code_blocks; cblkno ++) { + for (size_t cblkno = 0; cblkno < prec->cblk_allocated; cblkno ++) { Jpeg2000Cblk *cblk = &prec->cblk[cblkno]; av_freep(&cblk->data); av_freep(&cblk->passes); diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h index 389813a9b9..6594d8e5cb 100644 --- a/libavcodec/jpeg2000.h +++ b/libavcodec/jpeg2000.h @@ -179,6 +179,7 @@ typedef struct Jpeg2000Cblk { uint8_t incl; uint16_t length; uint16_t *lengthinc; + size_t lengthinc_allocated; uint8_t nb_lengthinc; uint8_t lblock; uint8_t *data; @@ -186,6 +187,7 @@ typedef struct Jpeg2000Cblk { int nb_terminations; int nb_terminationsinc; int *data_start; + size_t data_start_allocated; Jpeg2000Pass *passes; Jpeg2000Layer *layers; int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}} @@ -195,8 +197,11 @@ typedef struct Jpeg2000Prec { int nb_codeblocks_width; int nb_codeblocks_height; Jpeg2000TgtNode *zerobits; + unsigned int zerobits_size; Jpeg2000TgtNode *cblkincl; + unsigned int cblkincl_size; Jpeg2000Cblk *cblk; + size_t cblk_allocated; int decoded_layers; int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}} } Jpeg2000Prec; // precinct @@ -207,6 +212,7 @@ typedef struct Jpeg2000Band { int i_stepsize; // quantization stepsize float f_stepsize; // quantization stepsize Jpeg2000Prec *prec; + size_t prec_allocated; } Jpeg2000Band; // subband typedef struct Jpeg2000ResLevel { @@ -215,13 +221,17 @@ typedef struct Jpeg2000ResLevel { int num_precincts_x, num_precincts_y; // number of precincts in x/y direction uint8_t log2_prec_width, log2_prec_height; // exponent of precinct size Jpeg2000Band *band; + size_t band_allocated; } Jpeg2000ResLevel; // resolution level typedef struct Jpeg2000Component { Jpeg2000ResLevel *reslevel; + size_t reslevel_allocated; DWTContext dwt; float *f_data; + unsigned int f_data_size; int *i_data; + unsigned int i_data_size; int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- can be reduced with lowres option int coord_o[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- original values from jpeg2000 headers uint8_t roi_shift; // ROI scaling value for the component diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index 00aa73e261..49a815a9b0 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -79,6 +79,7 @@ typedef struct Jpeg2000TilePart { * one per component, so tile_part elements have a size of 3 */ typedef struct Jpeg2000Tile { Jpeg2000Component *comp; + size_t comp_allocated; uint8_t properties[4]; Jpeg2000CodingStyle codsty[4]; Jpeg2000QuantStyle qntsty[4]; @@ -141,6 +142,7 @@ typedef struct Jpeg2000DecoderContext { int curtileno; Jpeg2000Tile *tile; + size_t tile_allocated; Jpeg2000DSPContext dsp; /*options parameters*/ @@ -380,8 +382,7 @@ static int get_siz(Jpeg2000DecoderContext *s) return AVERROR(EINVAL); } - s->tile = av_calloc(s->numXtiles * s->numYtiles, sizeof(*s->tile)); - if (!s->tile) { + if (av_reallocz_array_reuse(&s->tile, &s->tile_allocated, s->numXtiles * s->numYtiles, INT_MAX, sizeof(*s->tile))) { s->numXtiles = s->numYtiles = 0; return AVERROR(ENOMEM); } @@ -389,8 +390,7 @@ static int get_siz(Jpeg2000DecoderContext *s) for (i = 0; i < s->numXtiles * s->numYtiles; i++) { Jpeg2000Tile *tile = s->tile + i; - tile->comp = av_mallocz(s->ncomponents * sizeof(*tile->comp)); - if (!tile->comp) + if (av_reallocz_array_reuse(&tile->comp, &tile->comp_allocated, s->ncomponents, INT_MAX, sizeof(*tile->comp))) return AVERROR(ENOMEM); } @@ -1160,7 +1160,6 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, for (cblkno = 0; cblkno < nb_code_blocks; cblkno++) { Jpeg2000Cblk *cblk = prec->cblk + cblkno; int incl, newpasses, llen; - void *tmp; if (cblk->npasses) incl = get_bits(s, 1); @@ -1200,14 +1199,10 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, cblk->nb_lengthinc = 0; cblk->nb_terminationsinc = 0; - av_free(cblk->lengthinc); - cblk->lengthinc = av_calloc(newpasses, sizeof(*cblk->lengthinc)); - if (!cblk->lengthinc) + if (av_realloc_array_reuse(&cblk->lengthinc, &cblk->lengthinc_allocated, newpasses, INT_MAX, sizeof(*cblk->lengthinc)) || + av_realloc_array_reuse(&cblk->data_start, &cblk->data_start_allocated, cblk->nb_terminations + newpasses + 1, INT_MAX, sizeof(*cblk->data_start))) return AVERROR(ENOMEM); - tmp = av_realloc_array(cblk->data_start, cblk->nb_terminations + newpasses + 1, sizeof(*cblk->data_start)); - if (!tmp) - return AVERROR(ENOMEM); - cblk->data_start = tmp; + do { int newpasses1 = 0; @@ -1296,7 +1291,6 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, cblk->data_start[cblk->nb_terminations] = cblk->length; } } - av_freep(&cblk->lengthinc); } } // Save state of stream @@ -2172,24 +2166,9 @@ static int jpeg2000_mct_write_frame(AVCodecContext *avctx, void *td, static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s) { - int tileno, compno; - for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) { - if (s->tile[tileno].comp) { - for (compno = 0; compno < s->ncomponents; compno++) { - Jpeg2000Component *comp = s->tile[tileno].comp + compno; - Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno; - - ff_jpeg2000_cleanup(comp, codsty); - } - av_freep(&s->tile[tileno].comp); - av_freep(&s->tile[tileno].packed_headers); - s->tile[tileno].packed_headers_size = 0; - } - } av_freep(&s->packed_headers); s->packed_headers_size = 0; memset(&s->packed_headers_stream, 0, sizeof(s->packed_headers_stream)); - av_freep(&s->tile); memset(s->codsty, 0, sizeof(s->codsty)); memset(s->qntsty, 0, sizeof(s->qntsty)); memset(s->properties, 0, sizeof(s->properties)); @@ -2726,6 +2705,19 @@ static av_cold int jpeg2000_decode_close(AVCodecContext *avctx) { Jpeg2000DecoderContext *s = avctx->priv_data; + for (size_t tileno = 0; tileno < s->tile_allocated; tileno++) { + if (s->tile[tileno].comp) { + for (size_t compno = 0; compno < s->tile[tileno].comp_allocated; compno++) { + Jpeg2000Component *comp = s->tile[tileno].comp + compno; + Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno; + + ff_jpeg2000_cleanup(comp, codsty); + } + av_freep(&s->tile[tileno].comp); + av_freep(&s->tile[tileno].packed_headers); + } + } + av_freep(&s->tile); av_freep(&s->idwt); av_freep(&s->cb); diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c index 921461b6d7..f3ddefe48f 100644 --- a/libavcodec/jpeg2000dwt.c +++ b/libavcodec/jpeg2000dwt.c @@ -531,6 +531,7 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2], { int i, j, lev = decomp_levels, b[2][2]; + size_t prod; s->ndeclevels = decomp_levels; s->type = type; @@ -556,11 +557,15 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2], } if (type == FF_DWT97) { - s->f_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->f_linebuf)); + if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->f_linebuf), &prod)) + return AVERROR(ENOMEM); + av_fast_malloc(&s->f_linebuf, &s->f_linebuf_size, prod); if (!s->f_linebuf) return AVERROR(ENOMEM); } else { - s->i_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->i_linebuf)); + if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->i_linebuf), &prod)) + return AVERROR(ENOMEM); + av_fast_malloc(&s->i_linebuf, &s->i_linebuf_size, prod); if (!s->i_linebuf) return AVERROR(ENOMEM); } diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h index d5e94c9916..fb6fc8f121 100644 --- a/libavcodec/jpeg2000dwt.h +++ b/libavcodec/jpeg2000dwt.h @@ -48,7 +48,9 @@ typedef struct DWTContext { uint8_t ndeclevels; ///< number of decomposition levels uint8_t type; ///< 0 for 9/7; 1 for 5/3 int32_t *i_linebuf; ///< int buffer used by transform + unsigned int i_linebuf_size; float *f_linebuf; ///< float buffer used by transform + unsigned int f_linebuf_size; int max_slices; int linesize; } DWTContext; -- 2.30.2 [-- Attachment #3: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2022-09-28 10:06 UTC|newest] Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-09-28 10:04 [FFmpeg-devel] [PATCH 01/11] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin 2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 02/11] lavc/jpeg2000dec: Reindent Tomas Härdin 2022-09-28 10:05 ` [FFmpeg-devel] [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms Tomas Härdin 2022-09-28 10:06 ` [FFmpeg-devel] [PATCH 04/11] lavc/jpeg2000dec: Thread init_tile() Tomas Härdin 2022-09-28 14:14 ` Tomas Härdin 2022-09-28 10:06 ` Tomas Härdin [this message] 2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 06/11] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent Tomas Härdin 2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 07/11] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4 Tomas Härdin 2022-09-28 10:07 ` [FFmpeg-devel] [PATCH 08/11] lavc/jpeg2000: Reindent Tomas Härdin 2022-09-28 10:08 ` [FFmpeg-devel] [PATCH 09/11] lavc/jpeg2000: Minimize calls to av_codec_is_encoder() Tomas Härdin 2022-09-28 10:09 ` [FFmpeg-devel] [PATCH 10/11] lavc/jpeg2000dec: Use coarser slicing for initial reslevels Tomas Härdin 2022-09-28 10:10 ` [FFmpeg-devel] [PATCH 11/11] lavc/jpeg2000dec: Component-level threading of write_frame() Tomas Härdin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=9e78ef32ebe31cebd72f0ef31a42892ccb9017ab.camel@haerdin.se \ --to=git@haerdin.se \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git