From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> To: ffmpeg-devel@ffmpeg.org Cc: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> Subject: [FFmpeg-devel] [PATCH 26/28] avcodec/mpegvideoencdsp: Factor draw_edges out in its own context Date: Mon, 6 May 2024 23:52:25 +0200 Message-ID: <GV1P250MB07379CD2C3C128004AAFF4258F1C2@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM> (raw) In-Reply-To: <AS8P250MB0744D14E6CEB1A5AD911A3398F1D2@AS8P250MB0744.EURP250.PROD.OUTLOOK.COM> This allows to remove a dependency of the dirac decoder (!) on mpegvideoenc. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com> --- configure | 7 +- libavcodec/Makefile | 1 + libavcodec/diracdec.c | 14 +-- libavcodec/drawedgesdsp.c | 61 ++++++++++ libavcodec/drawedgesdsp.h | 35 ++++++ libavcodec/mpegvideo.h | 2 + libavcodec/mpegvideo_enc.c | 9 +- libavcodec/mpegvideoencdsp.c | 32 ------ libavcodec/mpegvideoencdsp.h | 6 - libavcodec/snowenc.c | 13 ++- libavcodec/x86/Makefile | 1 + libavcodec/x86/drawedgesdsp.c | 157 ++++++++++++++++++++++++++ libavcodec/x86/mpegvideo.c | 2 +- libavcodec/x86/mpegvideoencdsp_init.c | 118 ------------------- 14 files changed, 281 insertions(+), 177 deletions(-) create mode 100644 libavcodec/drawedgesdsp.c create mode 100644 libavcodec/drawedgesdsp.h create mode 100644 libavcodec/x86/drawedgesdsp.c diff --git a/configure b/configure index bea4547e20..edb1ddca33 100755 --- a/configure +++ b/configure @@ -2552,6 +2552,7 @@ CONFIG_EXTRA=" dnn dovi_rpudec dovi_rpuenc + drawedgesdsp dvprofile evcparse exif @@ -2868,7 +2869,7 @@ mpeg_er_select="error_resilience" mpegaudio_select="mpegaudiodsp mpegaudioheader" mpegvideo_select="blockdsp hpeldsp idctdsp videodsp" mpegvideodec_select="h264chroma mpegvideo mpeg_er" -mpegvideoenc_select="aandcttables fdctdsp me_cmp mpegvideo pixblockdsp" +mpegvideoenc_select="aandcttables drawedgesdsp fdctdsp me_cmp mpegvideo pixblockdsp" msmpeg4dec_select="h263_decoder" msmpeg4enc_select="h263_encoder" vc1dsp_select="h264chroma qpeldsp startcode" @@ -2918,7 +2919,7 @@ cook_decoder_select="audiodsp sinewin" cri_decoder_select="mjpeg_decoder" cscd_decoder_suggest="zlib" dds_decoder_select="texturedsp" -dirac_decoder_select="dirac_parse dwt golomb mpegvideoenc qpeldsp videodsp" +dirac_decoder_select="dirac_parse drawedgesdsp dwt golomb qpeldsp videodsp" dnxhd_decoder_select="blockdsp idctdsp" dnxhd_encoder_select="blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp videodsp" dvvideo_decoder_select="dvprofile idctdsp" @@ -3060,7 +3061,7 @@ shorten_decoder_select="bswapdsp" sipr_decoder_select="lsp" smvjpeg_decoder_select="mjpeg_decoder" snow_decoder_select="dwt h264qpel rangecoder videodsp" -snow_encoder_select="dwt h264qpel hpeldsp me_cmp mpegvideoenc rangecoder videodsp" +snow_encoder_select="drawedgesdsp dwt h264qpel hpeldsp me_cmp mpegvideoenc rangecoder videodsp" sonic_decoder_select="golomb rangecoder" sonic_encoder_select="golomb rangecoder" sonic_ls_encoder_select="golomb rangecoder" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3e8a44e89c..dff6193bc2 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -89,6 +89,7 @@ OBJS-$(CONFIG_CBS_VP9) += cbs_vp9.o OBJS-$(CONFIG_DEFLATE_WRAPPER) += zlib_wrapper.o OBJS-$(CONFIG_DOVI_RPUDEC) += dovi_rpu.o dovi_rpudec.o OBJS-$(CONFIG_DOVI_RPUENC) += dovi_rpu.o dovi_rpuenc.o +OBJS-$(CONFIG_DRAWEDGESDSP) += drawedgesdsp.o OBJS-$(CONFIG_ERROR_RESILIENCE) += error_resilience.o OBJS-$(CONFIG_EVCPARSE) += evc_parse.o evc_ps.o OBJS-$(CONFIG_EXIF) += exif.o tiff_common.o diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c index f1fde0b339..f0df74e131 100644 --- a/libavcodec/diracdec.c +++ b/libavcodec/diracdec.c @@ -34,11 +34,11 @@ #include "get_bits.h" #include "codec_internal.h" #include "decode.h" +#include "drawedgesdsp.h" #include "golomb.h" #include "dirac_arith.h" #include "dirac_vlc.h" #include "mpegpicture.h" -#include "mpegvideoencdsp.h" #include "dirac_dwt.h" #include "dirac.h" #include "diractab.h" @@ -135,7 +135,7 @@ typedef struct DiracSlice { typedef struct DiracContext { AVCodecContext *avctx; - MpegvideoEncDSPContext mpvencdsp; + DrawEdgesDSPContext drawedges; VideoDSPContext vdsp; DiracDSPContext diracdsp; DiracVersionInfo version; @@ -397,7 +397,7 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx) s->thread_buf_size = -1; ff_diracdsp_init(&s->diracdsp); - ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); + ff_drawedgesdsp_init(&s->drawedges); ff_videodsp_init(&s->vdsp, 8); for (i = 0; i < MAX_FRAMES; i++) { @@ -1836,7 +1836,7 @@ static int interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int int i, edge = EDGE_WIDTH/2; ref->hpel[plane][0] = ref->avframe->data[plane]; - s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */ + s->drawedges.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */ /* no need for hpel if we only have fpel vectors */ if (!s->mv_precision) @@ -1856,9 +1856,9 @@ static int interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2], ref->hpel[plane][3], ref->hpel[plane][0], ref->avframe->linesize[plane], width, height); - s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->drawedges.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->drawedges.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); + s->drawedges.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); } ref->interpolated[plane] = 1; diff --git a/libavcodec/drawedgesdsp.c b/libavcodec/drawedgesdsp.c new file mode 100644 index 0000000000..3306bb9f6d --- /dev/null +++ b/libavcodec/drawedgesdsp.c @@ -0,0 +1,61 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> +#include <string.h> + +#include "config.h" + +#include "drawedgesdsp.h" +#include "libavutil/attributes.h" + +/* draw the edges of width 'w' of an image of size width, height */ +// FIXME: Check that this is OK for MPEG-4 interlaced. +static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides) +{ + uint8_t *ptr = buf, *last_line; + + /* left and right */ + for (int i = 0; i < height; i++) { + memset(ptr - w, ptr[0], w); + memset(ptr + width, ptr[width - 1], w); + ptr += wrap; + } + + /* top and bottom + corners */ + buf -= w; + last_line = buf + (height - 1) * wrap; + if (sides & EDGE_TOP) + for (int i = 0; i < h; i++) + // top + memcpy(buf - (i + 1) * wrap, buf, width + w + w); + if (sides & EDGE_BOTTOM) + for (int i = 0; i < h; i++) + // bottom + memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); +} + +av_cold void ff_drawedgesdsp_init(DrawEdgesDSPContext *c) +{ + c->draw_edges = draw_edges_c; + +#if ARCH_X86 + ff_drawedgesdsp_init_x86(c); +#endif +} diff --git a/libavcodec/drawedgesdsp.h b/libavcodec/drawedgesdsp.h new file mode 100644 index 0000000000..a57275809c --- /dev/null +++ b/libavcodec/drawedgesdsp.h @@ -0,0 +1,35 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_DRAWEDGESDSP_H +#define AVCODEC_DRAWEDGESDSP_H + +#include <stdint.h> + +#define EDGE_TOP 1 +#define EDGE_BOTTOM 2 + +typedef struct DrawEdgesDSPContext { + void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides); +} DrawEdgesDSPContext; + +void ff_drawedgesdsp_init(DrawEdgesDSPContext *c); +void ff_drawedgesdsp_init_x86(DrawEdgesDSPContext *c); + +#endif /* AVCODEC_DRAWEDGESDSP_H */ diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 215df0fd5b..4635172732 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -29,6 +29,7 @@ #define AVCODEC_MPEGVIDEO_H #include "blockdsp.h" +#include "drawedgesdsp.h" #include "error_resilience.h" #include "fdctdsp.h" #include "get_bits.h" @@ -222,6 +223,7 @@ typedef struct MpegEncContext { HpelDSPContext hdsp; IDCTDSPContext idsp; MECmpContext mecc; + DrawEdgesDSPContext drawedges; MpegvideoEncDSPContext mpvencdsp; PixblockDSPContext pdsp; QpelDSPContext qdsp; diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index 2a75973ac4..ab14538b33 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -810,6 +810,7 @@ av_cold int ff_mpv_encode_init(AVCodecContext *avctx) ff_fdctdsp_init(&s->fdsp, avctx); ff_me_cmp_init(&s->mecc, avctx); ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); + ff_drawedgesdsp_init(&s->drawedges); ff_pixblockdsp_init(&s->pdsp, avctx); if (!(avctx->stats_out = av_mallocz(256)) || @@ -1224,7 +1225,7 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg) } } if ((s->width & 15) || (s->height & (vpad-1))) { - s->mpvencdsp.draw_edges(dst, dst_stride, + s->drawedges.draw_edges(dst, dst_stride, w, h, 16 >> h_shift, vpad >> v_shift, @@ -1655,19 +1656,19 @@ static void frame_end(MpegEncContext *s) !s->intra_only) { int hshift = s->chroma_x_shift; int vshift = s->chroma_y_shift; - s->mpvencdsp.draw_edges(s->current_picture.f->data[0], + s->drawedges.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0], s->h_edge_pos, s->v_edge_pos, EDGE_WIDTH, EDGE_WIDTH, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(s->current_picture.f->data[1], + s->drawedges.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1], s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, EDGE_TOP | EDGE_BOTTOM); - s->mpvencdsp.draw_edges(s->current_picture.f->data[2], + s->drawedges.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2], s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c index 997d048663..a6de93456d 100644 --- a/libavcodec/mpegvideoencdsp.c +++ b/libavcodec/mpegvideoencdsp.c @@ -16,9 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <assert.h> #include <stdint.h> -#include <string.h> #include "config.h" #include "libavutil/avassert.h" @@ -114,34 +112,6 @@ static int pix_norm1_c(const uint8_t *pix, int line_size) return s; } -/* draw the edges of width 'w' of an image of size width, height */ -// FIXME: Check that this is OK for MPEG-4 interlaced. -static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides) -{ - uint8_t *ptr = buf, *last_line; - int i; - - /* left and right */ - for (i = 0; i < height; i++) { - memset(ptr - w, ptr[0], w); - memset(ptr + width, ptr[width - 1], w); - ptr += wrap; - } - - /* top and bottom + corners */ - buf -= w; - last_line = buf + (height - 1) * wrap; - if (sides & EDGE_TOP) - for (i = 0; i < h; i++) - // top - memcpy(buf - (i + 1) * wrap, buf, width + w + w); - if (sides & EDGE_BOTTOM) - for (i = 0; i < h; i++) - // bottom - memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); -} - /* 2x2 -> 1x1 */ static void shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, @@ -243,8 +213,6 @@ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; - c->draw_edges = draw_edges_8_c; - #if ARCH_ARM ff_mpegvideoencdsp_init_arm(c, avctx); #elif ARCH_PPC diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h index 95084679d9..46b40cf30e 100644 --- a/libavcodec/mpegvideoencdsp.h +++ b/libavcodec/mpegvideoencdsp.h @@ -26,9 +26,6 @@ #define BASIS_SHIFT 16 #define RECON_SHIFT 6 -#define EDGE_TOP 1 -#define EDGE_BOTTOM 2 - typedef struct MpegvideoEncDSPContext { int (*try_8x8basis)(const int16_t rem[64], const int16_t weight[64], const int16_t basis[64], int scale); @@ -39,9 +36,6 @@ typedef struct MpegvideoEncDSPContext { void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); - - void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides); } MpegvideoEncDSPContext; void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c index 43ca602762..ec708cebcc 100644 --- a/libavcodec/snowenc.c +++ b/libavcodec/snowenc.c @@ -27,6 +27,7 @@ #include "libavutil/pixdesc.h" #include "avcodec.h" #include "codec_internal.h" +#include "drawedgesdsp.h" #include "encode.h" #include "internal.h" //For AVCodecInternal.recon_frame #include "me_cmp.h" @@ -46,7 +47,7 @@ typedef struct SnowEncContext { SnowContext com; QpelDSPContext qdsp; - MpegvideoEncDSPContext mpvencdsp; + DrawEdgesDSPContext drawedges; int lambda; int lambda2; @@ -216,7 +217,7 @@ static av_cold int encode_init(AVCodecContext *avctx) mcf(12,12) ff_me_cmp_init(&enc->mecc, avctx); - ff_mpegvideoencdsp_init(&enc->mpvencdsp, avctx); + ff_drawedgesdsp_init(&enc->drawedges); ff_snow_alloc_blocks(s); @@ -1775,7 +1776,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]], &pict->data[i][y * pict->linesize[i]], AV_CEIL_RSHIFT(width, hshift)); - enc->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], + enc->drawedges.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], AV_CEIL_RSHIFT(width, hshift), AV_CEIL_RSHIFT(height, vshift), EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, EDGE_TOP | EDGE_BOTTOM); @@ -1815,14 +1816,14 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, int w = s->avctx->width; int h = s->avctx->height; - enc->mpvencdsp.draw_edges(s->current_picture->data[0], + enc->drawedges.draw_edges(s->current_picture->data[0], s->current_picture->linesize[0], w , h , EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM); if (s->current_picture->data[2]) { - enc->mpvencdsp.draw_edges(s->current_picture->data[1], + enc->drawedges.draw_edges(s->current_picture->data[1], s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift, EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); - enc->mpvencdsp.draw_edges(s->current_picture->data[2], + enc->drawedges.draw_edges(s->current_picture->data[2], s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift, EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); } diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 331183f450..2b6c6659fd 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -7,6 +7,7 @@ OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_init.o OBJS-$(CONFIG_BSWAPDSP) += x86/bswapdsp_init.o OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_init.o \ x86/dirac_dwt_init.o +OBJS-$(CONFIG_DRAWEDGESDSP) += x86/drawedgesdsp.o OBJS-$(CONFIG_FDCTDSP) += x86/fdctdsp_init.o OBJS-$(CONFIG_FMTCONVERT) += x86/fmtconvert_init.o OBJS-$(CONFIG_H263DSP) += x86/h263dsp_init.o diff --git a/libavcodec/x86/drawedgesdsp.c b/libavcodec/x86/drawedgesdsp.c new file mode 100644 index 0000000000..1d059b3806 --- /dev/null +++ b/libavcodec/x86/drawedgesdsp.c @@ -0,0 +1,157 @@ +/* + * draw_edges by Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" + +#include "libavcodec/drawedgesdsp.h" +#include "libavutil/attributes.h" +#include "libavutil/avassert.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" + +#if HAVE_INLINE_ASM + +/* Draw the edges of width 'w' of an image of size width, height + * this MMX version can only handle w == 8 || w == 16. */ +static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, + int w, int h, int sides) +{ + uint8_t *ptr, *last_line; + + last_line = buf + (height - 1) * wrap; + /* left and right */ + ptr = buf; + if (w == 8) { + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) wrap), "r" ((x86_reg) width), + "r" (ptr + wrap * height)); + } else if (w == 16) { + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq %%mm0, -16(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "movq %%mm1, 8(%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r"(ptr) + : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) + ); + } else { + av_assert1(w == 4); + __asm__ volatile ( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "movd %%mm0, -4(%0) \n\t" + "movd -4(%0, %2), %%mm1 \n\t" + "punpcklbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movd %%mm1, (%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) wrap), "r" ((x86_reg) width), + "r" (ptr + wrap * height)); + } + + /* top and bottom (and hopefully also the corners) */ + if (sides & EDGE_TOP) { + for (int i = 0; i < h; i += 4) { + ptr = buf - (i + 1) * wrap - w; + __asm__ volatile ( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm0, (%0, %2) \n\t" + "movq %%mm0, (%0, %2, 2) \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) buf - (x86_reg) ptr - w), + "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), + "r" (ptr + width + 2 * w)); + } + } + + if (sides & EDGE_BOTTOM) { + for (int i = 0; i < h; i += 4) { + ptr = last_line + (i + 1) * wrap - w; + __asm__ volatile ( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm0, (%0, %2) \n\t" + "movq %%mm0, (%0, %2, 2) \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" + "jb 1b \n\t" + : "+r" (ptr) + : "r" ((x86_reg) last_line - (x86_reg) ptr - w), + "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), + "r" (ptr + width + 2 * w)); + } + } +} + +#endif + +av_cold void ff_drawedgesdsp_init_x86(DrawEdgesDSPContext *c) +{ +#if HAVE_INLINE_ASM + int cpu_flags = av_get_cpu_flags(); + + if (INLINE_MMX(cpu_flags)) + c->draw_edges = draw_edges_mmx; +#endif +} diff --git a/libavcodec/x86/mpegvideo.c b/libavcodec/x86/mpegvideo.c index 73967cafda..a4ab2bb308 100644 --- a/libavcodec/x86/mpegvideo.c +++ b/libavcodec/x86/mpegvideo.c @@ -1,6 +1,6 @@ /* * Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru> - * H.263, MPEG-1, MPEG-2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at> + * H.263, MPEG-1, MPEG-2 dequantizer by Michael Niedermayer <michaelni@gmx.at> * * This file is part of FFmpeg. * diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c index 9fa7ee4824..c816ca2ee7 100644 --- a/libavcodec/x86/mpegvideoencdsp_init.c +++ b/libavcodec/x86/mpegvideoencdsp_init.c @@ -17,7 +17,6 @@ */ #include "libavutil/attributes.h" -#include "libavutil/avassert.h" #include "libavutil/cpu.h" #include "libavutil/x86/cpu.h" #include "libavcodec/avcodec.h" @@ -94,121 +93,6 @@ int ff_pix_norm1_sse2(const uint8_t *pix, int line_size); #undef PMULHRW #undef PHADDD #endif /* HAVE_SSSE3_INLINE */ - -/* Draw the edges of width 'w' of an image of size width, height - * this MMX version can only handle w == 8 || w == 16. */ -static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, - int w, int h, int sides) -{ - uint8_t *ptr, *last_line; - int i; - - last_line = buf + (height - 1) * wrap; - /* left and right */ - ptr = buf; - if (w == 8) { - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) wrap), "r" ((x86_reg) width), - "r" (ptr + wrap * height)); - } else if (w == 16) { - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "punpckldq %%mm0, %%mm0 \n\t" - "movq %%mm0, -8(%0) \n\t" - "movq %%mm0, -16(%0) \n\t" - "movq -8(%0, %2), %%mm1 \n\t" - "punpckhbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movq %%mm1, (%0, %2) \n\t" - "movq %%mm1, 8(%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r"(ptr) - : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) - ); - } else { - av_assert1(w == 4); - __asm__ volatile ( - "1: \n\t" - "movd (%0), %%mm0 \n\t" - "punpcklbw %%mm0, %%mm0 \n\t" - "punpcklwd %%mm0, %%mm0 \n\t" - "movd %%mm0, -4(%0) \n\t" - "movd -4(%0, %2), %%mm1 \n\t" - "punpcklbw %%mm1, %%mm1 \n\t" - "punpckhwd %%mm1, %%mm1 \n\t" - "punpckhdq %%mm1, %%mm1 \n\t" - "movd %%mm1, (%0, %2) \n\t" - "add %1, %0 \n\t" - "cmp %3, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) wrap), "r" ((x86_reg) width), - "r" (ptr + wrap * height)); - } - - /* top and bottom (and hopefully also the corners) */ - if (sides & EDGE_TOP) { - for (i = 0; i < h; i += 4) { - ptr = buf - (i + 1) * wrap - w; - __asm__ volatile ( - "1: \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq %%mm0, (%0, %2) \n\t" - "movq %%mm0, (%0, %2, 2) \n\t" - "movq %%mm0, (%0, %3) \n\t" - "add $8, %0 \n\t" - "cmp %4, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) buf - (x86_reg) ptr - w), - "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), - "r" (ptr + width + 2 * w)); - } - } - - if (sides & EDGE_BOTTOM) { - for (i = 0; i < h; i += 4) { - ptr = last_line + (i + 1) * wrap - w; - __asm__ volatile ( - "1: \n\t" - "movq (%1, %0), %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq %%mm0, (%0, %2) \n\t" - "movq %%mm0, (%0, %2, 2) \n\t" - "movq %%mm0, (%0, %3) \n\t" - "add $8, %0 \n\t" - "cmp %4, %0 \n\t" - "jb 1b \n\t" - : "+r" (ptr) - : "r" ((x86_reg) last_line - (x86_reg) ptr - w), - "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), - "r" (ptr + width + 2 * w)); - } - } -} - #endif /* HAVE_INLINE_ASM */ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, @@ -232,8 +116,6 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, c->try_8x8basis = try_8x8basis_mmx; } c->add_8x8basis = add_8x8basis_mmx; - - c->draw_edges = draw_edges_mmx; } if (INLINE_AMD3DNOW(cpu_flags)) { -- 2.40.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
next prev parent reply other threads:[~2024-05-06 21:52 UTC|newest] Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top 2024-05-05 8:25 [FFmpeg-devel] [PATCH 1/9] configure: Add missing dirac_decoder->qpeldsp dependency Andreas Rheinhardt 2024-05-05 8:27 ` [FFmpeg-devel] [PATCH 2/9] configure: Add missing pdv decoder->inflate_wrapper dependency Andreas Rheinhardt 2024-05-05 8:27 ` [FFmpeg-devel] [PATCH 3/9] configure: Add missing atrac3p(al) decoder->sinewin dependency Andreas Rheinhardt 2024-05-05 8:27 ` [FFmpeg-devel] [PATCH 4/9] configure: Add missing ftr decoder->adts_header dependency Andreas Rheinhardt 2024-05-05 8:27 ` [FFmpeg-devel] [PATCH 5/9] avcodec/Makefile, v408(dec|enc): Remove remnants of AYUV codecs Andreas Rheinhardt 2024-05-05 8:27 ` [FFmpeg-devel] [PATCH 6/9] Remove remnants of prores_lgpl decoder Andreas Rheinhardt 2024-05-05 8:27 ` [FFmpeg-devel] [PATCH 7/9] avcodec/Makefile: Correct name of cbd2_dpcm decoder Andreas Rheinhardt 2024-05-05 8:27 ` [FFmpeg-devel] [PATCH 8/9] configure: Add wmv2dsp->qpeldsp dependency Andreas Rheinhardt 2024-05-05 8:27 ` [FFmpeg-devel] [PATCH 9/9] avcodec/Makefile: Fix intrax8 objects Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 10/24] avformat/Makefile: Add asf_muxer->asf_tags.o dependency Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 11/24] avfilter/Makefile: Add missing tiltshelf entry Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 12/24] avfilter/Makefile: Add missing colormap->framesync dependency Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 13/24] avfilter/vf_xmedian: Define OFFSET, FLAGS macros unconditionally Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 14/24] avfilter/Makefile: Add scale(2ref)->framesync dependency Andreas Rheinhardt 2024-05-07 21:11 ` Frank Plowman 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 15/24] avfilter/vsrc_testsrc: Fix compilation of colorchart filter Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 16/24] avfilter/Makefile: Add missing guided->framesync dependency Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 17/24] avfilter/vf_convolution: Fix compilation with sobel disabled Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 18/24] avfilter/x86/Makefile: Add missing dependencies for sobel filter Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 19/24] avfilter/Makefile: Add missing multiply->framesync dependency Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 20/24] avfilter/vf_lut3d: Fix standalone build of lut1d Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 21/24] avfilter/x86/Makefile: Fix standalone build of haldclut filter Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 22/24] avfilter/Makefile: Add missing identity->framesync dependency Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 23/24] configure: Add missing identity filter->scene_sad dependency Andreas Rheinhardt 2024-05-05 13:40 ` [FFmpeg-devel] [PATCH 24/24] avfilter/Makefile: Add missing morpho->framesync dependency Andreas Rheinhardt 2024-05-06 21:52 ` [FFmpeg-devel] [PATCH 25/28] avcodec/x86/mpegvideoencdsp_init: Remove bits_per_raw_sample check Andreas Rheinhardt 2024-05-07 0:10 ` Michael Niedermayer 2024-05-07 8:49 ` Andreas Rheinhardt 2024-05-06 21:52 ` Andreas Rheinhardt [this message] 2024-05-06 22:52 ` [FFmpeg-devel] [PATCH 26/28] avcodec/mpegvideoencdsp: Factor draw_edges out in its own context Lynne 2024-05-06 21:52 ` [FFmpeg-devel] [PATCH 27/28] avcodec/mpegvideoencdsp: Pass bitexact flag directly Andreas Rheinhardt 2024-05-06 21:52 ` [FFmpeg-devel] [PATCH 28/28] avcodec/diracdec: Don't use EDGE_WIDTH from mpegpicture.h Andreas Rheinhardt 2024-05-07 8:55 ` [FFmpeg-devel] [PATCH 1/9] configure: Add missing dirac_decoder->qpeldsp dependency Andreas Rheinhardt
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=GV1P250MB07379CD2C3C128004AAFF4258F1C2@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM \ --to=andreas.rheinhardt@outlook.com \ --cc=ffmpeg-devel@ffmpeg.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel This inbox may be cloned and mirrored by anyone: git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git # If you have public-inbox 1.1+ installed, you may # initialize and index your mirror using the following commands: public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \ ffmpegdev@gitmailbox.com public-inbox-index ffmpegdev Example config snippet for mirrors. AGPL code for this site: git clone https://public-inbox.org/public-inbox.git