From: "Tomas Härdin" <git@haerdin.se> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Subject: [FFmpeg-devel] [PATCH 1/2] lavc/speedhqdec: Add AV_CODEC_CAP_SLICE_THREADS Date: Mon, 13 May 2024 17:42:54 +0200 Message-ID: <ce5bb50c2351c52662798f30d5aa601b4d7e002a.camel@haerdin.se> (raw) [-- Attachment #1: Type: text/plain, Size: 1432 bytes --] On a 36 core Intel(R) Xeon(R) Platinum 8124M CPU @ 3.00GHz command: /usr/bin/time ./ffmpeg -t 30 -thread_type slice -threads $THREADS -i $INPUT.mov -vcodec rawvideo -f null - before: frame= 1500 fps=160 q=-0.0 Lsize=N/A time=00:00:30.00 bitrate=N/A speed= 3.2x 10.54user 0.37system 0:09.40elapsed 116%CPU (0avgtext+0avgdata 175300maxresident)k -thread_type slice -threads 1 frame= 1500 fps=161 q=-0.0 Lsize=N/A time=00:00:30.00 bitrate=N/A speed=3.22x 10.57user 0.29system 0:09.34elapsed 116%CPU (0avgtext+0avgdata 175580maxresident)k -thread_type slice -threads 2 frame= 1500 fps=318 q=-0.0 Lsize=N/A time=00:00:30.00 bitrate=N/A speed=6.36x 10.53user 0.39system 0:04.73elapsed 230%CPU (0avgtext+0avgdata 175632maxresident)k -thread_type slice -threads 4 frame= 1500 fps=615 q=-0.0 Lsize=N/A time=00:00:30.00 bitrate=N/A speed=12.3x 10.58user 0.34system 0:02.46elapsed 444%CPU (0avgtext+0avgdata 175452maxresident)k -thread_type slice -threads 8 frame= 1500 fps=613 q=-0.0 Lsize=N/A time=00:00:30.00 bitrate=N/A speed=12.3x 10.60user 0.33system 0:02.46elapsed 443%CPU (0avgtext+0avgdata 180532maxresident)k ^ same as -threads 4 as we'd expect for progressive essence I don't have any interlaced samples at the moment, and speedhqenc can't make any. I also noticed speedhqenc produces broken output when width % 16 == 8. Will file a ticket on that tomorrow. /Tomas [-- Attachment #2: 0001-lavc-speedhqdec-Add-AV_CODEC_CAP_SLICE_THREADS.patch --] [-- Type: text/x-patch, Size: 6394 bytes --] From 29a0380a1537ba205ec91399512f676301d5e930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se> Date: Mon, 13 May 2024 16:36:31 +0200 Subject: [PATCH 1/2] lavc/speedhqdec: Add AV_CODEC_CAP_SLICE_THREADS Each field slice is assigned to one thread. Serial performance is unaffected. --- libavcodec/speedhqdec.c | 59 ++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/libavcodec/speedhqdec.c b/libavcodec/speedhqdec.c index d6b1fff7a5..77a159f7e5 100644 --- a/libavcodec/speedhqdec.c +++ b/libavcodec/speedhqdec.c @@ -58,6 +58,8 @@ typedef struct SHQContext { enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 } subsampling; enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type; + AVPacket *avpkt; + uint32_t second_field_offset; } SHQContext; /* NOTE: The first element is always 16, unscaled. */ @@ -266,9 +268,10 @@ static int decode_speedhq_border(const SHQContext *s, GetBitContext *gb, AVFrame return 0; } -static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride) +static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf_size, AVFrame *frame, int field_number, int start, int end, int line_stride, int slice_number) { - int ret, slice_number, slice_offsets[5]; + int ret, x, y, slice_offsets[5]; + uint32_t slice_begin, slice_end; int linesize_y = frame->linesize[0] * line_stride; int linesize_cb = frame->linesize[1] * line_stride; int linesize_cr = frame->linesize[2] * line_stride; @@ -283,21 +286,17 @@ static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf slice_offsets[0] = start; slice_offsets[4] = end; - for (slice_number = 1; slice_number < 4; slice_number++) { + for (x = 1; x < 4; x++) { uint32_t last_offset, slice_len; - last_offset = slice_offsets[slice_number - 1]; + last_offset = slice_offsets[x - 1]; slice_len = AV_RL24(buf + last_offset); - slice_offsets[slice_number] = last_offset + slice_len; + slice_offsets[x] = last_offset + slice_len; - if (slice_len < 3 || slice_offsets[slice_number] > end - 3) + if (slice_len < 3 || slice_offsets[x] > end - 3) return AVERROR_INVALIDDATA; } - for (slice_number = 0; slice_number < 4; slice_number++) { - uint32_t slice_begin, slice_end; - int x, y; - slice_begin = slice_offsets[slice_number]; slice_end = slice_offsets[slice_number + 1]; @@ -390,14 +389,34 @@ static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, int buf } } } - } - if (s->subsampling != SHQ_SUBSAMPLING_444 && (frame->width & 15)) + if (s->subsampling != SHQ_SUBSAMPLING_444 && (frame->width & 15) && slice_number == 3) return decode_speedhq_border(s, &gb, frame, field_number, line_stride); return 0; } +static int decode_slice_progressive(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) +{ + SHQContext *s = avctx->priv_data; + (void)threadnr; + + return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 0, 4, s->avpkt->size, 1, jobnr); +} + +static int decode_slice_interlaced(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) +{ + SHQContext *s = avctx->priv_data; + int field_number = jobnr / 4; + int slice_number = jobnr % 4; + (void)threadnr; + + if (field_number == 0) + return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 0, 4, s->second_field_offset, 2, slice_number); + else + return decode_speedhq_field(avctx->priv_data, s->avpkt->data, s->avpkt->size, arg, 1, s->second_field_offset, s->avpkt->size, 2, slice_number); +} + static void compute_quant_matrix(int *output, int qscale) { int i; @@ -411,7 +430,6 @@ static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame, const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; uint8_t quality; - uint32_t second_field_offset; int ret; if (buf_size < 4 || avctx->width < 8 || avctx->width % 8 != 0) @@ -429,8 +447,8 @@ static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame, compute_quant_matrix(s->quant_matrix, 100 - quality); - second_field_offset = AV_RL24(buf + 1); - if (second_field_offset >= buf_size - 3) { + s->second_field_offset = AV_RL24(buf + 1); + if (s->second_field_offset >= buf_size - 3) { return AVERROR_INVALIDDATA; } @@ -441,8 +459,9 @@ static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame, return ret; } frame->flags |= AV_FRAME_FLAG_KEY; + s->avpkt = avpkt; - if (second_field_offset == 4 || second_field_offset == (buf_size-4)) { + if (s->second_field_offset == 4 || s->second_field_offset == (buf_size-4)) { /* * Overlapping first and second fields is used to signal * encoding only a single field. In this case, "height" @@ -452,12 +471,10 @@ static int speedhq_decode_frame(AVCodecContext *avctx, AVFrame *frame, * but this matches the convention used in NDI, which is * the primary user of this trick. */ - if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, buf_size, 1)) < 0) + if ((ret = avctx->execute2(avctx, decode_slice_progressive, frame, NULL, 4)) < 0) return ret; } else { - if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, second_field_offset, 2)) < 0) - return ret; - if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 1, second_field_offset, buf_size, 2)) < 0) + if ((ret = avctx->execute2(avctx, decode_slice_interlaced, frame, NULL, 8)) < 0) return ret; } @@ -653,5 +670,5 @@ const FFCodec ff_speedhq_decoder = { .priv_data_size = sizeof(SHQContext), .init = speedhq_decode_init, FF_CODEC_DECODE_CB(speedhq_decode_frame), - .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS, + .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS, }; -- 2.39.2 [-- Attachment #3: Type: text/plain, Size: 251 bytes --] _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next reply other threads:[~2024-05-13 15:43 UTC|newest] Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-05-13 15:42 Tomas Härdin [this message] 2024-05-13 15:43 ` [FFmpeg-devel] [PATCH 2/2] lavc/speedhqdec: Reindent Tomas Härdin 2024-05-14 8:20 ` [FFmpeg-devel] [PATCH 1/2] lavc/speedhqdec: Add AV_CODEC_CAP_SLICE_THREADS Tomas Härdin 2024-05-30 14:23 ` Tomas Härdin 2024-06-03 12:54 ` Tomas Härdin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=ce5bb50c2351c52662798f30d5aa601b4d7e002a.camel@haerdin.se \ --to=git@haerdin.se \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git