Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH] x86/aacencdsp: add SSE2 and AVX versions of quantize_bands
Date: Tue, 4 Jun 2024 03:42:08 +0200
Message-ID: <GV1P250MB07374AAEE38699E9BE8642FB8FF82@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM> (raw)
In-Reply-To: <20240604012343.1771-1-jamrial@gmail.com>

James Almer:
> quant_bands_signed_sse2: 417.0
> quant_bands_signed_avx: 202.0

Missing benchmark numbers for the C code

> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  libavcodec/aacenc.h             |  2 +-
>  libavcodec/x86/aacencdsp.asm    | 27 ++++++++++++++++++++++++---
>  libavcodec/x86/aacencdsp_init.c |  6 ++++++
>  tests/checkasm/aacencdsp.c      |  4 ++--
>  4 files changed, 33 insertions(+), 6 deletions(-)
> 
> diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
> index d07960620e..ae15f91e06 100644
> --- a/libavcodec/aacenc.h
> +++ b/libavcodec/aacenc.h
> @@ -242,7 +242,7 @@ typedef struct AACEncContext {
>      enum RawDataBlockType cur_type;              ///< channel group type cur_channel belongs to
>  
>      AudioFrameQueue afq;
> -    DECLARE_ALIGNED(16, int,   qcoefs)[96];      ///< quantized coefficients
> +    DECLARE_ALIGNED(32, int,   qcoefs)[96];      ///< quantized coefficients
>      DECLARE_ALIGNED(32, float, scoefs)[1024];    ///< scaled coefficients
>  
>      uint16_t quantize_band_cost_cache_generation;
> diff --git a/libavcodec/x86/aacencdsp.asm b/libavcodec/x86/aacencdsp.asm
> index 0d3ba4b89d..99be2d87f5 100644
> --- a/libavcodec/x86/aacencdsp.asm
> +++ b/libavcodec/x86/aacencdsp.asm
> @@ -53,8 +53,19 @@ cglobal abs_pow34, 3, 3, 3, out, in, size
>  ;                           int size, int is_signed, int maxval, const float Q34,
>  ;                           const float rounding)
>  ;*******************************************************************
> -INIT_XMM sse2
> +%macro AAC_QUANTIZE_BANDS 0
>  cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding
> +%if mmsize == 32
> +    vbroadcastss m0, Q34m
> +    vbroadcastss m1, roundingm
> +%if UNIX64 == 0
> +    cvtsi2ss xm3, dword maxvalm
> +%else
> +    cvtsi2ss xm3, maxvald
> +%endif
> +    shufps   xm3, xm3, xm3, 0
> +    vinsertf128 m3, m3, xm3, 1
> +%else ; mmsize == 16
>  %if UNIX64 == 0
>      movss     m0, Q34m
>      movss     m1, roundingm
> @@ -65,9 +76,13 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
>      shufps    m0, m0, 0
>      shufps    m1, m1, 0
>      shufps    m3, m3, 0
> +%endif
>      shl       is_signedd, 31
> -    movd      m4, is_signedd
> -    shufps    m4, m4, 0
> +    movd     xm4, is_signedd
> +    shufps   xm4, xm4, xm4, 0
> +%if mmsize == 32
> +    vinsertf128 m4, m4, xm4, 1
> +%endif
>      shl       sized,   2
>      add       inq, sizeq
>      add       outq, sizeq
> @@ -84,3 +99,9 @@ cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q
>      add       sizeq, mmsize
>      jl       .loop
>      RET
> +%endmacro
> +
> +INIT_XMM sse2
> +AAC_QUANTIZE_BANDS
> +INIT_YMM avx
> +AAC_QUANTIZE_BANDS
> diff --git a/libavcodec/x86/aacencdsp_init.c b/libavcodec/x86/aacencdsp_init.c
> index e0d8dec4f8..cf17dbf91d 100644
> --- a/libavcodec/x86/aacencdsp_init.c
> +++ b/libavcodec/x86/aacencdsp_init.c
> @@ -30,6 +30,9 @@ void ff_abs_pow34_sse(float *out, const float *in, const int size);
>  void ff_aac_quantize_bands_sse2(int *out, const float *in, const float *scaled,
>                                  int size, int is_signed, int maxval, const float Q34,
>                                  const float rounding);
> +void ff_aac_quantize_bands_avx(int *out, const float *in, const float *scaled,
> +                               int size, int is_signed, int maxval, const float Q34,
> +                               const float rounding);
>  
>  av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
>  {
> @@ -40,4 +43,7 @@ av_cold void ff_aacenc_dsp_init_x86(AACEncDSPContext *s)
>  
>      if (EXTERNAL_SSE2(cpu_flags))
>          s->quant_bands = ff_aac_quantize_bands_sse2;

Seems like the commit message is wrong: You are not adding an SSE2 version.

> +
> +    if (EXTERNAL_AVX_FAST(cpu_flags))
> +        s->quant_bands = ff_aac_quantize_bands_avx;
>  }
> diff --git a/tests/checkasm/aacencdsp.c b/tests/checkasm/aacencdsp.c
> index 791dd30320..5308a2ac03 100644
> --- a/tests/checkasm/aacencdsp.c
> +++ b/tests/checkasm/aacencdsp.c
> @@ -81,8 +81,8 @@ static void test_quant_bands(AACEncDSPContext *s)
>      for (int sign = 0; sign <= 1; sign++) {
>          if (check_func(s->quant_bands, "quant_bands_%s",
>                         sign ? "signed" : "unsigned")) {
> -            LOCAL_ALIGNED_16(int, out, [BUF_SIZE]);
> -            LOCAL_ALIGNED_16(int, out2, [BUF_SIZE]);
> +            LOCAL_ALIGNED_32(int, out, [BUF_SIZE]);
> +            LOCAL_ALIGNED_32(int, out2, [BUF_SIZE]);
>  
>              call_ref(out, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);
>              call_new(out2, in, scaled, BUF_SIZE, sign, maxval, q34, rounding);

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

  reply	other threads:[~2024-06-04  1:42 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-04  1:23 James Almer
2024-06-04  1:42 ` Andreas Rheinhardt [this message]
2024-06-04  1:45   ` James Almer
2024-06-04  6:37 ` Rémi Denis-Courmont
2024-06-04 15:52   ` James Almer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=GV1P250MB07374AAEE38699E9BE8642FB8FF82@GV1P250MB0737.EURP250.PROD.OUTLOOK.COM \
    --to=andreas.rheinhardt@outlook.com \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git