From: "Tomas Härdin" <tjoppen@acc.umu.se> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH 07/13] lavc/jpeg2000*: Use ff_fast_recalloc() to eliminate lots of allocations Date: Tue, 14 Jun 2022 16:42:43 +0200 Message-ID: <f3c27ac8051c11b12726a2791eeae13887973c2e.camel@acc.umu.se> (raw) In-Reply-To: <10ec51ef44325c2de6d5de7b994a9b6c8eb5e3a2.camel@acc.umu.se> [-- Attachment #1: Type: text/plain, Size: 1 bytes --] [-- Attachment #2: 0007-lavc-jpeg2000-Use-ff_fast_recalloc-to-eliminate-lots.patch --] [-- Type: text/x-patch, Size: 16413 bytes --] From 72a5f47503338a4fff816440ad64bc62cc23a738 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se> Date: Mon, 13 Jun 2022 17:04:10 +0200 Subject: [PATCH 07/13] lavc/jpeg2000*: Use ff_fast_recalloc() to eliminate lots of allocations --- libavcodec/jpeg2000.c | 72 +++++++++++++++++++++------------------- libavcodec/jpeg2000.h | 9 +++++ libavcodec/jpeg2000dec.c | 28 ++++++++-------- libavcodec/jpeg2000dwt.c | 9 +++-- libavcodec/jpeg2000dwt.h | 2 ++ 5 files changed, 70 insertions(+), 50 deletions(-) diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c index 945b787565..7ec5986875 100644 --- a/libavcodec/jpeg2000.c +++ b/libavcodec/jpeg2000.c @@ -52,17 +52,23 @@ static int32_t tag_tree_size(int w, int h) } /* allocate the memory for tag tree */ -static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h) +static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h) { int pw = w, ph = h; - Jpeg2000TgtNode *res, *t, *t2; + Jpeg2000TgtNode *t, *t2; int32_t tt_size; + size_t prod; tt_size = tag_tree_size(w, h); - t = res = av_calloc(tt_size, sizeof(*t)); - if (!res) - return NULL; + if (av_size_mult(tt_size, sizeof(*t), &prod)) + return AVERROR(ENOMEM); + + av_fast_malloc(old, size, prod); + if (!*old) + return AVERROR(ENOMEM); + t = *old; + memset(*old, 0, prod); while (w > 1 || h > 1) { int i, j; @@ -80,7 +86,7 @@ static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h) t = t2; } t[0].parent = NULL; - return res; + return 0; } void ff_tag_tree_zero(Jpeg2000TgtNode *t, int w, int h, int val) @@ -316,16 +322,14 @@ static int init_prec(AVCodecContext *avctx, /* Tag trees initialization */ - prec->cblkincl = - ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width, - prec->nb_codeblocks_height); - if (!prec->cblkincl) - return AVERROR(ENOMEM); - - prec->zerobits = - ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width, - prec->nb_codeblocks_height); - if (!prec->zerobits) + if (ff_jpeg2000_tag_tree_init(&prec->cblkincl, + &prec->cblkincl_size, + prec->nb_codeblocks_width, + prec->nb_codeblocks_height) || + ff_jpeg2000_tag_tree_init(&prec->zerobits, + &prec->zerobits_size, + prec->nb_codeblocks_width, + prec->nb_codeblocks_height)) return AVERROR(ENOMEM); if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) { @@ -333,8 +337,7 @@ static int init_prec(AVCodecContext *avctx, return AVERROR(ENOMEM); } nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height; - prec->cblk = av_calloc(nb_codeblocks, sizeof(*prec->cblk)); - if (!prec->cblk) + if (ff_fast_recalloc(&prec->cblk, &prec->cblk_size, nb_codeblocks, sizeof(*prec->cblk))) return AVERROR(ENOMEM); for (cblkno = 0; cblkno < nb_codeblocks; cblkno++) { Jpeg2000Cblk *cblk = prec->cblk + cblkno; @@ -376,6 +379,7 @@ static int init_prec(AVCodecContext *avctx, cblk->length = 0; cblk->npasses = 0; if (av_codec_is_encoder(avctx->codec)) { + av_freep(&cblk->layers); cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers)); if (!cblk->layers) return AVERROR(ENOMEM); @@ -448,8 +452,7 @@ static int init_band(AVCodecContext *avctx, return AVERROR(ENOMEM); } nb_precincts = reslevel->num_precincts_x * reslevel->num_precincts_y; - band->prec = av_calloc(nb_precincts, sizeof(*band->prec)); - if (!band->prec) + if (ff_fast_recalloc(&band->prec, &band->prec_size, nb_precincts, sizeof(*band->prec))) return AVERROR(ENOMEM); for (precno = 0; precno < nb_precincts; precno++) { @@ -471,6 +474,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, { int reslevelno, bandno, gbandno = 0, ret, i, j; uint32_t csize; + size_t prod; if (codsty->nreslevels2decode <= 0) { av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode); @@ -496,19 +500,22 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, if (codsty->transform == FF_DWT97) { csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->f_data); - comp->i_data = NULL; - comp->f_data = av_calloc(csize, sizeof(*comp->f_data)); + if (av_size_mult(csize, sizeof(*comp->f_data), &prod)) + return AVERROR(ENOMEM); + av_fast_malloc(&comp->f_data, &comp->f_data_size, prod); if (!comp->f_data) return AVERROR(ENOMEM); + memset(comp->f_data, 0, prod); } else { csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data); - comp->f_data = NULL; - comp->i_data = av_calloc(csize, sizeof(*comp->i_data)); + if (av_size_mult(csize, sizeof(*comp->i_data), &prod)) + return AVERROR(ENOMEM); + av_fast_malloc(&comp->i_data, &comp->i_data_size, prod); if (!comp->i_data) return AVERROR(ENOMEM); + memset(comp->i_data, 0, prod); } - comp->reslevel = av_calloc(codsty->nreslevels, sizeof(*comp->reslevel)); - if (!comp->reslevel) + if (ff_fast_recalloc(&comp->reslevel, &comp->reslevel_size, codsty->nreslevels, sizeof(*comp->reslevel))) return AVERROR(ENOMEM); /* LOOP on resolution levels */ for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++) { @@ -555,8 +562,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp, reslevel->log2_prec_height) - (reslevel->coord[1][0] >> reslevel->log2_prec_height); - reslevel->band = av_calloc(reslevel->nbands, sizeof(*reslevel->band)); - if (!reslevel->band) + if (ff_fast_recalloc(&reslevel->band, &reslevel->band_size, reslevel->nbands, sizeof(*reslevel->band))) return AVERROR(ENOMEM); if (reslevel->num_precincts_x * (uint64_t)reslevel->num_precincts_y * reslevel->nbands > avctx->max_pixels / sizeof(*reslevel->band->prec)) @@ -599,7 +605,7 @@ void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty) { int reslevelno, bandno, precno; for (reslevelno = 0; - comp->reslevel && reslevelno < codsty->nreslevels; + comp->reslevel && reslevelno < comp->reslevel_size/sizeof(*comp->reslevel); reslevelno++) { Jpeg2000ResLevel *reslevel; @@ -607,23 +613,21 @@ void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty) continue; reslevel = comp->reslevel + reslevelno; - for (bandno = 0; bandno < reslevel->nbands; bandno++) { + for (bandno = 0; bandno < reslevel->band_size/sizeof(*reslevel->band); bandno++) { Jpeg2000Band *band; if (!reslevel->band) continue; band = reslevel->band + bandno; - for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++) { + for (precno = 0; precno < band->prec_size/sizeof(*band->prec); precno++) { if (band->prec) { Jpeg2000Prec *prec = band->prec + precno; - int nb_code_blocks = prec->nb_codeblocks_height * prec->nb_codeblocks_width; - av_freep(&prec->zerobits); av_freep(&prec->cblkincl); if (prec->cblk) { int cblkno; - for (cblkno = 0; cblkno < nb_code_blocks; cblkno ++) { + for (cblkno = 0; cblkno < prec->cblk_size/sizeof(*prec->cblk); cblkno ++) { Jpeg2000Cblk *cblk = &prec->cblk[cblkno]; av_freep(&cblk->data); av_freep(&cblk->passes); diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h index cbb8e0d951..3bf85a6669 100644 --- a/libavcodec/jpeg2000.h +++ b/libavcodec/jpeg2000.h @@ -177,6 +177,7 @@ typedef struct Jpeg2000Cblk { uint8_t incl; uint16_t length; uint16_t *lengthinc; + unsigned int lengthinc_size; uint8_t nb_lengthinc; uint8_t lblock; uint8_t *data; @@ -193,8 +194,11 @@ typedef struct Jpeg2000Prec { int nb_codeblocks_width; int nb_codeblocks_height; Jpeg2000TgtNode *zerobits; + unsigned int zerobits_size; Jpeg2000TgtNode *cblkincl; + unsigned int cblkincl_size; Jpeg2000Cblk *cblk; + unsigned int cblk_size; int decoded_layers; int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}} } Jpeg2000Prec; // precinct @@ -205,6 +209,7 @@ typedef struct Jpeg2000Band { int i_stepsize; // quantization stepsize float f_stepsize; // quantization stepsize Jpeg2000Prec *prec; + unsigned int prec_size; } Jpeg2000Band; // subband typedef struct Jpeg2000ResLevel { @@ -213,13 +218,17 @@ typedef struct Jpeg2000ResLevel { int num_precincts_x, num_precincts_y; // number of precincts in x/y direction uint8_t log2_prec_width, log2_prec_height; // exponent of precinct size Jpeg2000Band *band; + unsigned int band_size; } Jpeg2000ResLevel; // resolution level typedef struct Jpeg2000Component { Jpeg2000ResLevel *reslevel; + unsigned int reslevel_size; DWTContext dwt; float *f_data; + unsigned int f_data_size; int *i_data; + unsigned int i_data_size; int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- can be reduced with lowres option int coord_o[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- original values from jpeg2000 headers uint8_t roi_shift; // ROI scaling value for the component diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c index ef5167c29e..a3fc05ea97 100644 --- a/libavcodec/jpeg2000dec.c +++ b/libavcodec/jpeg2000dec.c @@ -79,6 +79,7 @@ typedef struct Jpeg2000TilePart { * one per component, so tile_part elements have a size of 3 */ typedef struct Jpeg2000Tile { Jpeg2000Component *comp; + unsigned int comp_size; uint8_t properties[4]; Jpeg2000CodingStyle codsty[4]; Jpeg2000QuantStyle qntsty[4]; @@ -141,6 +142,7 @@ typedef struct Jpeg2000DecoderContext { int curtileno; Jpeg2000Tile *tile; + unsigned int tile_size; Jpeg2000DSPContext dsp; /*options parameters*/ @@ -380,8 +382,7 @@ static int get_siz(Jpeg2000DecoderContext *s) return AVERROR(EINVAL); } - s->tile = av_calloc(s->numXtiles * s->numYtiles, sizeof(*s->tile)); - if (!s->tile) { + if (ff_fast_recalloc(&s->tile, &s->tile_size, s->numXtiles * s->numYtiles, sizeof(*s->tile))) { s->numXtiles = s->numYtiles = 0; return AVERROR(ENOMEM); } @@ -389,8 +390,7 @@ static int get_siz(Jpeg2000DecoderContext *s) for (i = 0; i < s->numXtiles * s->numYtiles; i++) { Jpeg2000Tile *tile = s->tile + i; - tile->comp = av_mallocz(s->ncomponents * sizeof(*tile->comp)); - if (!tile->comp) + if (ff_fast_recalloc(&tile->comp, &tile->comp_size, s->ncomponents, sizeof(*tile->comp))) return AVERROR(ENOMEM); } @@ -1196,9 +1196,7 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, cblk->nb_lengthinc = 0; cblk->nb_terminationsinc = 0; - av_free(cblk->lengthinc); - cblk->lengthinc = av_calloc(newpasses, sizeof(*cblk->lengthinc)); - if (!cblk->lengthinc) + if (ff_fast_recalloc(&cblk->lengthinc, &cblk->lengthinc_size, newpasses, sizeof(*cblk->lengthinc))) return AVERROR(ENOMEM); tmp = av_realloc_array(cblk->data_start, cblk->nb_terminations + newpasses + 1, sizeof(*cblk->data_start)); if (!tmp) @@ -1292,7 +1290,6 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile, cblk->data_start[cblk->nb_terminations] = cblk->length; } } - av_freep(&cblk->lengthinc); } } // Save state of stream @@ -2166,12 +2163,13 @@ static int jpeg2000_mct_write_frame(AVCodecContext *avctx, void *td, return 0; } -static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s) +static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s, int close) { int tileno, compno; - for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) { + if (close) { + for (tileno = 0; tileno < s->tile_size/sizeof(*s->tile); tileno++) { if (s->tile[tileno].comp) { - for (compno = 0; compno < s->ncomponents; compno++) { + for (compno = 0; compno < s->tile[tileno].comp_size/sizeof(*s->tile[tileno].comp); compno++) { Jpeg2000Component *comp = s->tile[tileno].comp + compno; Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno; @@ -2182,10 +2180,11 @@ static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s) s->tile[tileno].packed_headers_size = 0; } } + av_freep(&s->tile); + } av_freep(&s->packed_headers); s->packed_headers_size = 0; memset(&s->packed_headers_stream, 0, sizeof(s->packed_headers_stream)); - av_freep(&s->tile); memset(s->codsty, 0, sizeof(s->codsty)); memset(s->qntsty, 0, sizeof(s->qntsty)); memset(s->properties, 0, sizeof(s->properties)); @@ -2689,7 +2688,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture, avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles); - jpeg2000_dec_cleanup(s); + jpeg2000_dec_cleanup(s, 0); *got_frame = 1; @@ -2702,7 +2701,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture, return bytestream2_tell(&s->g); end: - jpeg2000_dec_cleanup(s); + jpeg2000_dec_cleanup(s, 0); return ret; } @@ -2712,6 +2711,7 @@ static av_cold int jpeg2000_decode_close(AVCodecContext *avctx) av_freep(&s->idwt); av_freep(&s->cb); + jpeg2000_dec_cleanup(s, 1); return 0; } diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c index 921461b6d7..f3ddefe48f 100644 --- a/libavcodec/jpeg2000dwt.c +++ b/libavcodec/jpeg2000dwt.c @@ -531,6 +531,7 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2], { int i, j, lev = decomp_levels, b[2][2]; + size_t prod; s->ndeclevels = decomp_levels; s->type = type; @@ -556,11 +557,15 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2], } if (type == FF_DWT97) { - s->f_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->f_linebuf)); + if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->f_linebuf), &prod)) + return AVERROR(ENOMEM); + av_fast_malloc(&s->f_linebuf, &s->f_linebuf_size, prod); if (!s->f_linebuf) return AVERROR(ENOMEM); } else { - s->i_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->i_linebuf)); + if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->i_linebuf), &prod)) + return AVERROR(ENOMEM); + av_fast_malloc(&s->i_linebuf, &s->i_linebuf_size, prod); if (!s->i_linebuf) return AVERROR(ENOMEM); } diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h index d5e94c9916..fb6fc8f121 100644 --- a/libavcodec/jpeg2000dwt.h +++ b/libavcodec/jpeg2000dwt.h @@ -48,7 +48,9 @@ typedef struct DWTContext { uint8_t ndeclevels; ///< number of decomposition levels uint8_t type; ///< 0 for 9/7; 1 for 5/3 int32_t *i_linebuf; ///< int buffer used by transform + unsigned int i_linebuf_size; float *f_linebuf; ///< float buffer used by transform + unsigned int f_linebuf_size; int max_slices; int linesize; } DWTContext; -- 2.30.2 [-- Attachment #3: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2022-06-14 14:42 UTC|newest] Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top 2022-06-14 14:39 [FFmpeg-devel] [PATCH 01/13] lavc/jpeg2000dec: Finer granularity threading Tomas Härdin 2022-06-14 14:39 ` [FFmpeg-devel] [PATCH 02/13] lavc/jpeg2000dec: Reindent Tomas Härdin 2022-06-14 14:40 ` [FFmpeg-devel] [PATCH 03/13] lavc/jpeg2000dwt: Implement sliced transforms Tomas Härdin 2022-06-14 14:40 ` [FFmpeg-devel] [PATCH 04/13] lavc/jpeg2000dec: Implement IDWT slicing Tomas Härdin 2022-06-14 14:41 ` [FFmpeg-devel] [PATCH 05/13] lavc/jpeg2000dec: Thread init_tile() Tomas Härdin 2022-06-14 21:11 ` Michael Niedermayer 2022-06-15 13:11 ` Tomas Härdin 2022-06-15 21:05 ` Michael Niedermayer 2022-06-16 9:28 ` Tomas Härdin 2022-06-14 14:42 ` [FFmpeg-devel] [PATCH 06/13] lavu/mem: Add ff_fast_recalloc() Tomas Härdin 2022-06-14 20:26 ` Michael Niedermayer 2022-06-15 9:59 ` Tomas Härdin 2022-06-15 12:15 ` James Almer 2022-06-16 12:44 ` Tomas Härdin 2022-06-18 14:57 ` Anton Khirnov 2022-06-21 8:04 ` Tomas Härdin 2022-06-14 14:42 ` Tomas Härdin [this message] 2022-06-14 15:23 ` [FFmpeg-devel] [PATCH 07/13] lavc/jpeg2000*: Use ff_fast_recalloc() to eliminate lots of allocations Andreas Rheinhardt 2022-06-15 10:03 ` Tomas Härdin 2022-06-14 14:43 ` [FFmpeg-devel] [PATCH 08/13] lavc/jpeg2000: Switch Jpeg2000TgtNode to int32_t parent Tomas Härdin 2022-06-14 14:43 ` [FFmpeg-devel] [PATCH 09/13] lavc/jpeg2000: Speed up ff_jpeg2000_tag_tree_init() using stereotypes for sizes <= 4x4 Tomas Härdin 2022-06-18 15:00 ` Anton Khirnov 2022-06-21 7:57 ` Tomas Härdin 2022-06-14 14:43 ` [FFmpeg-devel] [PATCH 10/13] lavc/jpeg2000: Reindent Tomas Härdin 2022-06-14 14:44 ` [FFmpeg-devel] [PATCH 11/13] lavc/jpeg2000: Minimize calls to av_codec_is_encoder() Tomas Härdin 2022-06-14 15:04 ` Andreas Rheinhardt 2022-06-15 10:20 ` Tomas Härdin 2022-06-14 14:44 ` [FFmpeg-devel] [PATCH 12/13] lavc/jpeg2000dec: Use coarser slicing for initial reslevels Tomas Härdin 2022-06-14 14:47 ` [FFmpeg-devel] [PATCH 13/13] lavc/jpeg2000dec: Component-level threading of write_frame() Tomas Härdin 2022-06-18 14:50 ` [FFmpeg-devel] [PATCH 01/13] lavc/jpeg2000dec: Finer granularity threading Anton Khirnov 2022-06-24 8:19 ` Tomas Härdin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=f3c27ac8051c11b12726a2791eeae13887973c2e.camel@acc.umu.se \ --to=tjoppen@acc.umu.se \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git