* [FFmpeg-devel] [PATCH 2/5] lavu/lls: use ff_scalarproduct_double_c()
2024-05-30 19:06 [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product Rémi Denis-Courmont
@ 2024-05-30 19:06 ` Rémi Denis-Courmont
2024-05-30 19:06 ` [FFmpeg-devel] [PATCH 3/5] lavfi: get rid of bespoke double scalar products Rémi Denis-Courmont
` (4 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-30 19:06 UTC (permalink / raw)
To: ffmpeg-devel
---
libavutil/lls.c | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/libavutil/lls.c b/libavutil/lls.c
index c1e038daf1..1096ae69d5 100644
--- a/libavutil/lls.c
+++ b/libavutil/lls.c
@@ -30,6 +30,7 @@
#include "config.h"
#include "attributes.h"
+#include "float_dsp.h"
#include "lls.h"
static void update_lls(LLSModel *m, const double *var)
@@ -102,13 +103,7 @@ void avpriv_solve_lls(LLSModel *m, double threshold, unsigned short min_order)
static double evaluate_lls(LLSModel *m, const double *param, int order)
{
- int i;
- double out = 0;
-
- for (i = 0; i <= order; i++)
- out += param[i] * m->coeff[order][i];
-
- return out;
+ return ff_scalarproduct_double_c(m->coeff[order], param, order + 1);
}
av_cold void avpriv_init_lls(LLSModel *m, int indep_count)
--
2.45.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* [FFmpeg-devel] [PATCH 3/5] lavfi: get rid of bespoke double scalar products
2024-05-30 19:06 [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product Rémi Denis-Courmont
2024-05-30 19:06 ` [FFmpeg-devel] [PATCH 2/5] lavu/lls: use ff_scalarproduct_double_c() Rémi Denis-Courmont
@ 2024-05-30 19:06 ` Rémi Denis-Courmont
2024-05-30 19:06 ` [FFmpeg-devel] [PATCH 4/5] checkasm/float_dsp: add double-precision scalar product Rémi Denis-Courmont
` (3 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-30 19:06 UTC (permalink / raw)
To: ffmpeg-devel
---
libavfilter/aap_template.c | 14 +-------------
libavfilter/anlms_template.c | 16 ++--------------
libavfilter/arls_template.c | 14 +-------------
3 files changed, 4 insertions(+), 40 deletions(-)
diff --git a/libavfilter/aap_template.c b/libavfilter/aap_template.c
index ea9c815a89..0e0580fb32 100644
--- a/libavfilter/aap_template.c
+++ b/libavfilter/aap_template.c
@@ -36,18 +36,6 @@
#define fn2(a,b) fn3(a,b)
#define fn(a) fn2(a, SAMPLE_FORMAT)
-#if DEPTH == 64
-static double scalarproduct_double(const double *v1, const double *v2, int len)
-{
- double p = 0.0;
-
- for (int i = 0; i < len; i++)
- p += v1[i] * v2[i];
-
- return p;
-}
-#endif
-
static ftype fn(fir_sample)(AudioAPContext *s, ftype sample, ftype *delay,
ftype *coeffs, ftype *tmp, int *offset)
{
@@ -60,7 +48,7 @@ static ftype fn(fir_sample)(AudioAPContext *s, ftype sample, ftype *delay,
#if DEPTH == 32
output = s->fdsp->scalarproduct_float(delay, tmp, s->kernel_size);
#else
- output = scalarproduct_double(delay, tmp, s->kernel_size);
+ output = s->fdsp->scalarproduct_double(delay, tmp, s->kernel_size);
#endif
if (--(*offset) < 0)
diff --git a/libavfilter/anlms_template.c b/libavfilter/anlms_template.c
index b25df4fa18..a8d1dbfe0f 100644
--- a/libavfilter/anlms_template.c
+++ b/libavfilter/anlms_template.c
@@ -33,18 +33,6 @@
#define fn2(a,b) fn3(a,b)
#define fn(a) fn2(a, SAMPLE_FORMAT)
-#if DEPTH == 64
-static double scalarproduct_double(const double *v1, const double *v2, int len)
-{
- double p = 0.0;
-
- for (int i = 0; i < len; i++)
- p += v1[i] * v2[i];
-
- return p;
-}
-#endif
-
static ftype fn(fir_sample)(AudioNLMSContext *s, ftype sample, ftype *delay,
ftype *coeffs, ftype *tmp, int *offset)
{
@@ -58,7 +46,7 @@ static ftype fn(fir_sample)(AudioNLMSContext *s, ftype sample, ftype *delay,
#if DEPTH == 32
output = s->fdsp->scalarproduct_float(delay, tmp, s->kernel_size);
#else
- output = scalarproduct_double(delay, tmp, s->kernel_size);
+ output = s->fdsp->scalarproduct_double(delay, tmp, s->kernel_size);
#endif
if (--(*offset) < 0)
@@ -85,7 +73,7 @@ static ftype fn(process_sample)(AudioNLMSContext *s, ftype input, ftype desired,
#if DEPTH == 32
sum = s->fdsp->scalarproduct_float(delay, delay, s->kernel_size);
#else
- sum = scalarproduct_double(delay, delay, s->kernel_size);
+ sum = s->fdsp->scalarproduct_double(delay, delay, s->kernel_size);
#endif
norm = s->eps + sum;
b = mu * e / norm;
diff --git a/libavfilter/arls_template.c b/libavfilter/arls_template.c
index d8b19d89a5..c67b48cf6f 100644
--- a/libavfilter/arls_template.c
+++ b/libavfilter/arls_template.c
@@ -39,18 +39,6 @@
#define fn2(a,b) fn3(a,b)
#define fn(a) fn2(a, SAMPLE_FORMAT)
-#if DEPTH == 64
-static double scalarproduct_double(const double *v1, const double *v2, int len)
-{
- double p = 0.0;
-
- for (int i = 0; i < len; i++)
- p += v1[i] * v2[i];
-
- return p;
-}
-#endif
-
static ftype fn(fir_sample)(AudioRLSContext *s, ftype sample, ftype *delay,
ftype *coeffs, ftype *tmp, int *offset)
{
@@ -64,7 +52,7 @@ static ftype fn(fir_sample)(AudioRLSContext *s, ftype sample, ftype *delay,
#if DEPTH == 32
output = s->fdsp->scalarproduct_float(delay, tmp, s->kernel_size);
#else
- output = scalarproduct_double(delay, tmp, s->kernel_size);
+ output = s->fdsp->scalarproduct_double(delay, tmp, s->kernel_size);
#endif
if (--(*offset) < 0)
--
2.45.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* [FFmpeg-devel] [PATCH 4/5] checkasm/float_dsp: add double-precision scalar product
2024-05-30 19:06 [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product Rémi Denis-Courmont
2024-05-30 19:06 ` [FFmpeg-devel] [PATCH 2/5] lavu/lls: use ff_scalarproduct_double_c() Rémi Denis-Courmont
2024-05-30 19:06 ` [FFmpeg-devel] [PATCH 3/5] lavfi: get rid of bespoke double scalar products Rémi Denis-Courmont
@ 2024-05-30 19:06 ` Rémi Denis-Courmont
2024-05-30 19:06 ` [FFmpeg-devel] [PATCH 5/5] lavu/float_dsp: R-V V scalarproduct_double Rémi Denis-Courmont
` (2 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-30 19:06 UTC (permalink / raw)
To: ffmpeg-devel
---
tests/checkasm/float_dsp.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/tests/checkasm/float_dsp.c b/tests/checkasm/float_dsp.c
index cadfa65e2a..296db1cff9 100644
--- a/tests/checkasm/float_dsp.c
+++ b/tests/checkasm/float_dsp.c
@@ -278,6 +278,22 @@ static void test_scalarproduct_float(const float *src0, const float *src1)
bench_new(src0, src1, LEN);
}
+static void test_scalarproduct_double(const double *src0, const double *src1)
+{
+ double cprod, oprod;
+
+ declare_func_float(double, const double *, const double *, size_t);
+
+ cprod = call_ref(src0, src1, LEN);
+ oprod = call_new(src0, src1, LEN);
+ if (!double_near_abs_eps(cprod, oprod, ARBITRARY_SCALARPRODUCT_CONST)) {
+ fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
+ cprod, oprod, cprod - oprod);
+ fail();
+ }
+ bench_new(src0, src1, LEN);
+}
+
void checkasm_check_float_dsp(void)
{
LOCAL_ALIGNED_32(float, src0, [LEN]);
@@ -334,6 +350,9 @@ void checkasm_check_float_dsp(void)
if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
test_scalarproduct_float(src3, src4);
report("scalarproduct_float");
+ if (check_func(fdsp->scalarproduct_double, "scalarproduct_double"))
+ test_scalarproduct_double(dbl_src0, dbl_src1);
+ report("scalarproduct_double");
av_freep(&fdsp);
}
--
2.45.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* [FFmpeg-devel] [PATCH 5/5] lavu/float_dsp: R-V V scalarproduct_double
2024-05-30 19:06 [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product Rémi Denis-Courmont
` (2 preceding siblings ...)
2024-05-30 19:06 ` [FFmpeg-devel] [PATCH 4/5] checkasm/float_dsp: add double-precision scalar product Rémi Denis-Courmont
@ 2024-05-30 19:06 ` Rémi Denis-Courmont
2024-05-30 19:10 ` [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product James Almer
2024-05-30 19:31 ` Rémi Denis-Courmont
5 siblings, 0 replies; 10+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-30 19:06 UTC (permalink / raw)
To: ffmpeg-devel
C908:
scalarproduct_double_c: 39.2
scalarproduct_double_rvv_f64: 10.5
X60:
scalarproduct_double_c: 35.0
scalarproduct_double_rvv_f64: 5.2
---
libavutil/riscv/float_dsp_init.c | 3 +++
libavutil/riscv/float_dsp_rvv.S | 21 +++++++++++++++++++++
2 files changed, 24 insertions(+)
diff --git a/libavutil/riscv/float_dsp_init.c b/libavutil/riscv/float_dsp_init.c
index 585f237225..155496fa6b 100644
--- a/libavutil/riscv/float_dsp_init.c
+++ b/libavutil/riscv/float_dsp_init.c
@@ -46,6 +46,8 @@ void ff_vector_dmac_scalar_rvv(double *dst, const double *src, double mul,
int len);
void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
int len);
+double ff_scalarproduct_double_rvv(const double *v1, const double *v2,
+ size_t len);
av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
{
@@ -68,6 +70,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
fdsp->vector_dmul = ff_vector_dmul_rvv;
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_rvv;
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
+ fdsp->scalarproduct_double = ff_scalarproduct_double_rvv;
}
}
#endif
diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S
index e6ec182a7a..2f0ade6db6 100644
--- a/libavutil/riscv/float_dsp_rvv.S
+++ b/libavutil/riscv/float_dsp_rvv.S
@@ -249,3 +249,24 @@ NOHWD mv a2, a3
ret
endfunc
+
+func ff_scalarproduct_double_rvv, zve64f
+ vsetvli t0, zero, e64, m8, ta, ma
+ vmv.v.x v8, zero
+ vmv.s.x v0, zero
+1:
+ vsetvli t0, a2, e64, m8, tu, ma
+ vle64.v v16, (a0)
+ sub a2, a2, t0
+ vle64.v v24, (a1)
+ sh3add a0, t0, a0
+ vfmacc.vv v8, v16, v24
+ sh3add a1, t0, a1
+ bnez a2, 1b
+
+ vsetvli t0, zero, e64, m8, ta, ma
+ vfredusum.vs v0, v8, v0
+ vfmv.f.s fa0, v0
+NOHWD fmv.x.w a0, fa0
+ ret
+endfunc
--
2.45.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product
2024-05-30 19:06 [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product Rémi Denis-Courmont
` (3 preceding siblings ...)
2024-05-30 19:06 ` [FFmpeg-devel] [PATCH 5/5] lavu/float_dsp: R-V V scalarproduct_double Rémi Denis-Courmont
@ 2024-05-30 19:10 ` James Almer
2024-05-30 19:21 ` Rémi Denis-Courmont
2024-05-30 19:31 ` Rémi Denis-Courmont
5 siblings, 1 reply; 10+ messages in thread
From: James Almer @ 2024-05-30 19:10 UTC (permalink / raw)
To: ffmpeg-devel
On 5/30/2024 4:06 PM, Rémi Denis-Courmont wrote:
> The function pointer is appended to the structure for backward binary
> compatibility. Fortunately, this is allocated by libavutil, not by the
> user, so increasing the structure size is safe.
> ---
> libavutil/float_dsp.c | 12 ++++++++++++
> libavutil/float_dsp.h | 31 ++++++++++++++++++++++++++++++-
> 2 files changed, 42 insertions(+), 1 deletion(-)
>
> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
> index e9fb023466..08bbc85e3e 100644
> --- a/libavutil/float_dsp.c
> +++ b/libavutil/float_dsp.c
> @@ -132,6 +132,17 @@ float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
> return p;
> }
>
> +double ff_scalarproduct_double_c(const double *v1, const double *v2,
> + size_t len)
> +{
> + double p = 0.0;
> +
> + for (size_t i = 0; i < len; i++)
> + p += v1[i] * v2[i];
> +
> + return p;
> +}
> +
> av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
> {
> AVFloatDSPContext *fdsp = av_mallocz(sizeof(AVFloatDSPContext));
> @@ -149,6 +160,7 @@ av_cold AVFloatDSPContext *avpriv_float_dsp_alloc(int bit_exact)
> fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
> fdsp->butterflies_float = butterflies_float_c;
> fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
> + fdsp->scalarproduct_double = ff_scalarproduct_double_c;
>
> #if ARCH_AARCH64
> ff_float_dsp_init_aarch64(fdsp);
> diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
> index 342a8715c5..5053aa240d 100644
> --- a/libavutil/float_dsp.h
> +++ b/libavutil/float_dsp.h
> @@ -19,6 +19,8 @@
> #ifndef AVUTIL_FLOAT_DSP_H
> #define AVUTIL_FLOAT_DSP_H
>
> +#include <stddef.h>
> +
> typedef struct AVFloatDSPContext {
> /**
> * Calculate the entry wise product of two vectors of floats and store the result in
> @@ -187,19 +189,46 @@ typedef struct AVFloatDSPContext {
> */
> void (*vector_dmul)(double *dst, const double *src0, const double *src1,
> int len);
> +
> + /**
> + * Calculate the scalar product of two vectors of doubles.
> + *
> + * @param v1 first vector
> + * @param v2 second vector
> + * @param len length of vectors
> + *
> + * @return inner product of the vectors
> + */
> + double (*scalarproduct_double)(const double *v1, const double *v2,
> + size_t len);
> } AVFloatDSPContext;
>
> /**
> - * Return the scalar product of two vectors.
> + * Return the scalar product of two vectors of floats.
> *
> * @param v1 first input vector
> + * constraints: 32-byte aligned
> * @param v2 first input vector
> + * constraints: 32-byte aligned
> * @param len number of elements
> + * constraints: multiple of 16
Why are you adding this to the doxy for scalarproduct_float()? Those
constrains are not correct for it. They are for scalarproduct_double()
which you're adding now.
> *
> * @return sum of elementwise products
> */
> float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
>
> +/**
> + * Return the scalar product of two vectors of doubles.
> + *
> + * @param v1 first input vector
> + * @param v2 first input vector
> + * @param len number of elements
> + *
> + * @return inner product of the vectors
> + */
> +double ff_scalarproduct_double_c(const double *v1, const double *v2,
> + size_t len);
> +
> void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp);
> void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp);
> void ff_float_dsp_init_ppc(AVFloatDSPContext *fdsp, int strict);
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product
2024-05-30 19:10 ` [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product James Almer
@ 2024-05-30 19:21 ` Rémi Denis-Courmont
2024-05-30 19:28 ` James Almer
0 siblings, 1 reply; 10+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-30 19:21 UTC (permalink / raw)
To: ffmpeg-devel
Le torstaina 30. toukokuuta 2024, 22.10.28 EEST James Almer a écrit :
> Why are you adding this to the doxy for scalarproduct_float()? Those
> constrains are not correct for it. They are for scalarproduct_double()
> which you're adding now.
Because copy-paste error.
--
Rémi Denis-Courmont
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product
2024-05-30 19:21 ` Rémi Denis-Courmont
@ 2024-05-30 19:28 ` James Almer
0 siblings, 0 replies; 10+ messages in thread
From: James Almer @ 2024-05-30 19:28 UTC (permalink / raw)
To: ffmpeg-devel
On 5/30/2024 4:21 PM, Rémi Denis-Courmont wrote:
> Le torstaina 30. toukokuuta 2024, 22.10.28 EEST James Almer a écrit :
>> Why are you adding this to the doxy for scalarproduct_float()? Those
>> constrains are not correct for it. They are for scalarproduct_double()
>> which you're adding now.
>
> Because copy-paste error.
Ok, patchset LGTM after you amend that.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product
2024-05-30 19:06 [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product Rémi Denis-Courmont
` (4 preceding siblings ...)
2024-05-30 19:10 ` [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product James Almer
@ 2024-05-30 19:31 ` Rémi Denis-Courmont
2024-05-30 19:33 ` James Almer
5 siblings, 1 reply; 10+ messages in thread
From: Rémi Denis-Courmont @ 2024-05-30 19:31 UTC (permalink / raw)
To: ffmpeg-devel
Le torstaina 30. toukokuuta 2024, 22.06.55 EEST Rémi Denis-Courmont a écrit :
> The function pointer is appended to the structure for backward binary
> compatibility. Fortunately, this is allocated by libavutil, not by the
> user, so increasing the structure size is safe.
> ---
> libavutil/float_dsp.c | 12 ++++++++++++
> libavutil/float_dsp.h | 31 ++++++++++++++++++++++++++++++-
> 2 files changed, 42 insertions(+), 1 deletion(-)
>
> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
> index e9fb023466..08bbc85e3e 100644
> --- a/libavutil/float_dsp.c
> +++ b/libavutil/float_dsp.c
> @@ -132,6 +132,17 @@ float avpriv_scalarproduct_float_c(const float *v1,
> const float *v2, int len) return p;
> }
>
> +double ff_scalarproduct_double_c(const double *v1, const double *v2,
> + size_t len)
> +{
> + double p = 0.0;
> +
> + for (size_t i = 0; i < len; i++)
> + p += v1[i] * v2[i];
> +
> + return p;
> +}
> +
If somebody wants to write x86 assembly, they can probably borrow most of the
code for evaluate_lls. It is a double precision scalar product with a little
bit of extra fluff in the prologue.
--
レミ・デニ-クールモン
http://www.remlab.net/
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [FFmpeg-devel] [PATCHv2 1/5] lavu/float_dsp: add double-precision scalar product
2024-05-30 19:31 ` Rémi Denis-Courmont
@ 2024-05-30 19:33 ` James Almer
0 siblings, 0 replies; 10+ messages in thread
From: James Almer @ 2024-05-30 19:33 UTC (permalink / raw)
To: ffmpeg-devel
On 5/30/2024 4:31 PM, Rémi Denis-Courmont wrote:
> Le torstaina 30. toukokuuta 2024, 22.06.55 EEST Rémi Denis-Courmont a écrit :
>> The function pointer is appended to the structure for backward binary
>> compatibility. Fortunately, this is allocated by libavutil, not by the
>> user, so increasing the structure size is safe.
>> ---
>> libavutil/float_dsp.c | 12 ++++++++++++
>> libavutil/float_dsp.h | 31 ++++++++++++++++++++++++++++++-
>> 2 files changed, 42 insertions(+), 1 deletion(-)
>>
>> diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
>> index e9fb023466..08bbc85e3e 100644
>> --- a/libavutil/float_dsp.c
>> +++ b/libavutil/float_dsp.c
>> @@ -132,6 +132,17 @@ float avpriv_scalarproduct_float_c(const float *v1,
>> const float *v2, int len) return p;
>> }
>>
>> +double ff_scalarproduct_double_c(const double *v1, const double *v2,
>> + size_t len)
>> +{
>> + double p = 0.0;
>> +
>> + for (size_t i = 0; i < len; i++)
>> + p += v1[i] * v2[i];
>> +
>> + return p;
>> +}
>> +
>
> If somebody wants to write x86 assembly, they can probably borrow most of the
> code for evaluate_lls. It is a double precision scalar product with a little
> bit of extra fluff in the prologue.
I already did, I'm just waiting for this set to be pushed before sending it.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 10+ messages in thread