From: averne <averne381@gmail.com> To: ffmpeg-devel@ffmpeg.org Cc: averne <averne381@gmail.com> Subject: [FFmpeg-devel] [PATCH 07/16] hwcontext_nvtegra: add dynamic frequency scaling routines Date: Thu, 30 May 2024 21:43:09 +0200 Message-ID: <cfdb0e3af24a59b7a3f2656fb7ce92bc0d3150a6.1717083800.git.averne381@gmail.com> (raw) In-Reply-To: <cover.1717083799.git.averne381@gmail.com> To save on energy, the clock speed of multimedia engines should be adapted to their workload. Signed-off-by: averne <averne381@gmail.com> --- libavutil/hwcontext_nvtegra.c | 165 ++++++++++++++++++++++++++++++++++ libavutil/hwcontext_nvtegra.h | 7 ++ 2 files changed, 172 insertions(+) diff --git a/libavutil/hwcontext_nvtegra.c b/libavutil/hwcontext_nvtegra.c index 0f4d5a323b..6b72348082 100644 --- a/libavutil/hwcontext_nvtegra.c +++ b/libavutil/hwcontext_nvtegra.c @@ -46,6 +46,14 @@ typedef struct NVTegraDevicePriv { AVNVTegraJobPool job_pool; uint32_t vic_setup_off, vic_cmdbuf_off; + + double framerate; + uint32_t dfs_lowcorner; + double dfs_decode_cycles_ema; + double dfs_ema_damping; + int dfs_bitrate_sum; + int dfs_cur_sample, dfs_num_samples; + int64_t dfs_sampling_start_ts, dfs_last_ts_delta; } NVTegraDevicePriv; static const enum AVPixelFormat supported_sw_formats[] = { @@ -108,6 +116,28 @@ static inline uint32_t nvtegra_surface_get_height_align(enum AVPixelFormat fmt, return 32; } +static int nvtegra_channel_set_freq(AVNVTegraChannel *channel, uint32_t freq) { + int err; +#ifndef __SWITCH__ + err = av_nvtegra_channel_set_clock_rate(channel, channel->module_id, freq); + if (err < 0) + return err; + + err = av_nvtegra_channel_get_clock_rate(channel, channel->module_id, &channel->clock); + if (err < 0) + return err; +#else + err = AVERROR(mmuRequestSetAndWait(&channel->mmu_request, freq, -1)); + if (err < 0) + return err; + + err = AVERROR(mmuRequestGet(&channel->mmu_request, &channel->clock)); + if (err < 0) + return err; +#endif + return 0; +} + static void nvtegra_device_uninit(AVHWDeviceContext *ctx) { NVTegraDevicePriv *priv = ctx->hwctx; AVNVTegraDeviceContext *hwctx = &priv->p; @@ -386,6 +416,141 @@ static int nvtegra_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) { return 0; } +/* + * Possible frequencies on Icosa and Mariko+, in MHz + * (see tegra210-core-dvfs.c and tegra210b01-core-dvfs.c in l4t kernel sources, respectively): + * for NVDEC: + * 268.8, 384.0, 448.0, 486.4, 550.4, 576.0, 614.4, 652.8, 678.4, 691.2, 716.8 + * 460.8, 499.2, 556.8, 633.6, 652.8, 710.4, 748.8, 787.2, 825.6, 844.8, 883.2, 902.4, 921.6, 940.8, 960.0, 979.2 + * for NVJPG: + * 192.0, 307.2, 345.6, 409.6, 486.4, 524.8, 550.4, 576.0, 588.8, 614.4, 627.2 + * 422.4, 441.6, 499.2, 518.4, 537.6, 556.8, 576.0, 595.2, 614.4, 633.6, 652.8 + */ + +int av_nvtegra_dfs_init(AVHWDeviceContext *ctx, AVNVTegraChannel *channel, int width, int height, + double framerate_hz) +{ + NVTegraDevicePriv *priv = ctx->hwctx; + + uint32_t max_freq, lowcorner; + int num_mbs, err; + + priv->dfs_num_samples = 20; + priv->dfs_ema_damping = 0.1; + + /* + * Initialize low-corner frequency (reproduces official code) + * Framerate might be unavailable (or variable), but this is official logic + */ + num_mbs = width / 16 * height / 16; + if (num_mbs <= 3600) + lowcorner = 100000000; /* 480p */ + else if (num_mbs <= 8160) + lowcorner = 180000000; /* 720p */ + else if (num_mbs <= 32400) + lowcorner = 345000000; /* 1080p */ + else + lowcorner = 576000000; /* 4k */ + + if (framerate_hz >= 0.1 && isfinite(framerate_hz)) + lowcorner = FFMIN(lowcorner, lowcorner * framerate_hz / 30.0); + + priv->framerate = framerate_hz; + priv->dfs_lowcorner = lowcorner; + + av_log(ctx, AV_LOG_DEBUG, "DFS: Initializing lowcorner to %d Hz, using %u samples\n", + priv->dfs_lowcorner, priv->dfs_num_samples); + + /* + * Initialize channel to the max possible frequency (the kernel driver will clamp to an allowed value) + * Note: Official code passes INT_MAX kHz then multiplies by 1000 (to Hz) and converts to u32, + * resulting in this value. + */ + max_freq = (UINT64_C(1)<<32) - 1000 & UINT32_MAX; + + err = nvtegra_channel_set_freq(channel, max_freq); + if (err < 0) + return err; + + priv->dfs_decode_cycles_ema = 0.0; + priv->dfs_bitrate_sum = 0; + priv->dfs_cur_sample = 0; + priv->dfs_sampling_start_ts = av_gettime_relative(); + priv->dfs_last_ts_delta = 0; + + return 0; +} + +int av_nvtegra_dfs_update(AVHWDeviceContext *ctx, AVNVTegraChannel *channel, int bitstream_len, int decode_cycles) { + NVTegraDevicePriv *priv = ctx->hwctx; + + double frame_time, avg; + int64_t now, wl_dt; + uint32_t clock; + int err; + + /* + * Official software implements DFS using a flat average of the decoder pool occupancy. + * We instead use the decode cycles as reported by NVDEC microcode, and the "bitrate" + * (bitstream bits fed to the hardware in a given clock time interval, NOT video time), + * to calculate a suitable frequency, and multiply it by 1.2 for good measure: + * Freq = decode_cycles_per_bit * bits_per_second * 1.2 + */ + + /* Convert to bits */ + bitstream_len *= 8; + + /* Exponential moving average of decode cycles per frame */ + priv->dfs_decode_cycles_ema = priv->dfs_ema_damping * (double)decode_cycles/bitstream_len + + (1.0 - priv->dfs_ema_damping) * priv->dfs_decode_cycles_ema; + + priv->dfs_bitrate_sum += bitstream_len; + priv->dfs_cur_sample = (priv->dfs_cur_sample + 1) % priv->dfs_num_samples; + + err = 0; + + /* Reclock if we collected enough samples */ + if (priv->dfs_cur_sample == 0) { + now = av_gettime_relative(); + wl_dt = now - priv->dfs_sampling_start_ts; + + /* + * Try to filter bad sample sets caused by eg. pausing the video playback. + * We reject if one of these conditions is met: + * - the wall time is over 1.5x the framerate (10Hz is used as fallback if no framerate information is available) + * - the wall time is over 1.5x the ema-damped previous values + */ + + if (priv->framerate >= 0.1 && isfinite(priv->framerate)) + frame_time = 1.0e6 / priv->framerate; + else + frame_time = 0.1e6; + + if ((wl_dt < 1.5 * priv->dfs_num_samples * frame_time) || + ((priv->dfs_last_ts_delta) && (wl_dt < 1.5 * priv->dfs_last_ts_delta))) { + avg = priv->dfs_bitrate_sum * 1e6 / wl_dt; + clock = priv->dfs_decode_cycles_ema * avg * 1.2; + clock = FFMAX(clock, priv->dfs_lowcorner); + + av_log(ctx, AV_LOG_DEBUG, "DFS: %.0f cycles/b (ema), %.0f b/s -> clock %u Hz (lowcorner %u Hz)\n", + priv->dfs_decode_cycles_ema, avg, clock, priv->dfs_lowcorner); + + err = nvtegra_channel_set_freq(channel, clock); + + priv->dfs_last_ts_delta = wl_dt; + } + + priv->dfs_bitrate_sum = 0; + priv->dfs_sampling_start_ts = now; + } + + return err; +} + +int av_nvtegra_dfs_uninit(AVHWDeviceContext *ctx, AVNVTegraChannel *channel) { + return nvtegra_channel_set_freq(channel, 0); +} + static int nvtegra_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats) diff --git a/libavutil/hwcontext_nvtegra.h b/libavutil/hwcontext_nvtegra.h index 8a2383d304..7c845951d9 100644 --- a/libavutil/hwcontext_nvtegra.h +++ b/libavutil/hwcontext_nvtegra.h @@ -82,4 +82,11 @@ static inline AVNVTegraMap *av_nvtegra_frame_get_fbuf_map(const AVFrame *frame) */ int av_nvtegra_pixfmt_to_vic(enum AVPixelFormat fmt); +/* + * Dynamic frequency scaling routines + */ +int av_nvtegra_dfs_init(AVHWDeviceContext *ctx, AVNVTegraChannel *channel, int width, int height, double framerate_hz); +int av_nvtegra_dfs_update(AVHWDeviceContext *ctx, AVNVTegraChannel *channel, int bitstream_len, int decode_cycles); +int av_nvtegra_dfs_uninit(AVHWDeviceContext *ctx, AVNVTegraChannel *channel); + #endif /* AVUTIL_HWCONTEXT_NVTEGRA_H */ -- 2.45.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-05-30 19:45 UTC|newest] Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-05-30 19:43 [FFmpeg-devel] [PATCH 00/16] NVidia Tegra hardware decoding backend averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 01/16] avutil/buffer: add helper to allocate aligned memory averne 2024-05-30 20:38 ` Rémi Denis-Courmont 2024-05-31 21:06 ` averne 2024-05-31 21:44 ` Michael Niedermayer 2024-06-02 18:37 ` averne 2024-06-01 6:59 ` Rémi Denis-Courmont 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 02/16] configure, avutil: add support for HorizonOS averne 2024-05-30 20:37 ` Rémi Denis-Courmont 2024-05-31 21:06 ` averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 03/16] avutil: add ioctl definitions for tegra devices averne 2024-05-30 20:42 ` Rémi Denis-Courmont 2024-05-31 21:06 ` averne 2024-05-31 21:16 ` Timo Rothenpieler 2024-06-02 18:37 ` averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 04/16] avutil: add hardware definitions for NVDEC, NVJPG and VIC averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 05/16] avutil: add common code for nvtegra averne 2024-05-31 8:32 ` Rémi Denis-Courmont 2024-05-31 21:06 ` averne 2024-06-01 7:29 ` Rémi Denis-Courmont 2024-06-05 20:29 ` Mark Thompson 2024-06-29 19:35 ` averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 06/16] avutil: add nvtegra hwcontext averne 2024-06-05 20:47 ` Mark Thompson 2024-06-29 19:35 ` averne 2024-05-30 19:43 ` averne [this message] 2024-06-05 20:50 ` [FFmpeg-devel] [PATCH 07/16] hwcontext_nvtegra: add dynamic frequency scaling routines Mark Thompson 2024-06-29 19:35 ` averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 08/16] nvtegra: add common hardware decoding code averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 09/16] nvtegra: add mpeg1/2 hardware decoding averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 10/16] nvtegra: add mpeg4 " averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 11/16] nvtegra: add vc1 " averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 12/16] nvtegra: add h264 " averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 13/16] nvtegra: add hevc " averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 14/16] nvtegra: add vp8 " averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 15/16] nvtegra: add vp9 " averne 2024-05-30 19:43 ` [FFmpeg-devel] [PATCH 16/16] nvtegra: add mjpeg " averne
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=cfdb0e3af24a59b7a3f2656fb7ce92bc0d3150a6.1717083800.git.averne381@gmail.com \ --to=averne381@gmail.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git