* [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG
@ 2024-01-13 3:51 Michael Niedermayer
2024-01-14 14:14 ` Stefano Sabatini
0 siblings, 1 reply; 8+ messages in thread
From: Michael Niedermayer @ 2024-01-13 3:51 UTC (permalink / raw)
To: FFmpeg development discussions and patches
This is the 64bit version of Chris Doty-Humphreys SFC64
Compared to the LCGs these produce much better quality numbers.
Compared to LFGs this needs less state. (our LFG has 224 byte
state for its 32bit version) this has 32byte state
Also the initialization for our LFG is slower.
This is also much faster than KISS or PCG.
This commit replaces the broken LCG used before.
(broken as it had only a period ~200M due to being put in a double)
This changes the output from random() which is why libswresample.mak
is updated, update was done using the command in libswresample.mak
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
libavutil/eval.c | 24 +++-
libavutil/sfc64.h | 85 ++++++++++++++
tests/fate/libswresample.mak | 208 +++++++++++++++++------------------
tests/ref/fate/eval | 2 +-
4 files changed, 210 insertions(+), 109 deletions(-)
create mode 100644 libavutil/sfc64.h
diff --git a/libavutil/eval.c b/libavutil/eval.c
index dc6b3697bc2..349015d4fa3 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c
@@ -35,6 +35,7 @@
#include "internal.h"
#include "log.h"
#include "mathematics.h"
+#include "sfc64.h"
#include "time.h"
#include "avstring.h"
#include "timer.h"
@@ -55,6 +56,7 @@ typedef struct Parser {
void *log_ctx;
#define VARS 10
double *var;
+ FFSFC64 *prng_state;
} Parser;
static const AVClass eval_class = {
@@ -173,6 +175,7 @@ struct AVExpr {
} a;
struct AVExpr *param[3];
double *var;
+ FFSFC64 *prng_state;
};
static double etime(double v)
@@ -231,8 +234,14 @@ static double eval_expr(Parser *p, AVExpr *e)
#define COMPUTE_NEXT_RANDOM() \
int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
- uint64_t r = isnan(p->var[idx]) ? 0 : p->var[idx]; \
- r = r * 1664525 + 1013904223; \
+ FFSFC64 *s = p->prng_state + idx; \
+ uint64_t r; \
+ \
+ if (!s->counter) { \
+ r = isnan(p->var[idx]) ? 0 : p->var[idx]; \
+ ff_sfc64_init(s, r, r, r, 12); \
+ } \
+ r = ff_sfc64_get(s); \
p->var[idx] = r; \
case e_random: {
@@ -329,7 +338,11 @@ static double eval_expr(Parser *p, AVExpr *e)
case e_div: return e->value * (d2 ? (d / d2) : d * INFINITY);
case e_add: return e->value * (d + d2);
case e_last:return e->value * d2;
- case e_st : return e->value * (p->var[av_clip(d, 0, VARS-1)]= d2);
+ case e_st : {
+ int index = av_clip(d, 0, VARS-1);
+ p->prng_state[index].counter = 0;
+ return e->value * (p->var[index]= d2);
+ }
case e_hypot:return e->value * hypot(d, d2);
case e_atan2:return e->value * atan2(d, d2);
case e_bitand: return isnan(d) || isnan(d2) ? NAN : e->value * ((long int)d & (long int)d2);
@@ -349,6 +362,7 @@ void av_expr_free(AVExpr *e)
av_expr_free(e->param[1]);
av_expr_free(e->param[2]);
av_freep(&e->var);
+ av_freep(&e->prng_state);
av_freep(&e);
}
@@ -736,7 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
goto end;
}
e->var= av_mallocz(sizeof(double) *VARS);
- if (!e->var) {
+ e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
+ if (!e->var || !e->prng_state) {
ret = AVERROR(ENOMEM);
goto end;
}
@@ -778,6 +793,7 @@ double av_expr_eval(AVExpr *e, const double *const_values, void *opaque)
{
Parser p = { 0 };
p.var= e->var;
+ p.prng_state= e->prng_state;
p.const_values = const_values;
p.opaque = opaque;
diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
new file mode 100644
index 00000000000..05f1e84cc68
--- /dev/null
+++ b/libavutil/sfc64.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2024 Michael Niedermayer <michael-ffmpeg@niedermayer.cc>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * This is a implementation of SFC64, a 64-bit PRNG by Chris Doty-Humphrey.
+ *
+ * This Generator is much faster (0m1.872s) than 64bit KISS (0m3.823s) and PCG-XSH-RR-64/32 (0m2.700s)
+ * And passes testu01 and practrand test suits.
+ *
+ */
+
+/**
+ * @file
+ * simple Pseudo Random Number Generator
+ *
+ */
+
+#ifndef AVUTIL_SFC64_H
+#define AVUTIL_SFC64_H
+
+#include <inttypes.h>
+
+typedef struct FFSFC64 {
+ uint64_t a,b,c,counter;
+} FFSFC64;
+
+static inline uint64_t ff_sfc64_get(FFSFC64 *s) {
+ uint64_t tmp = s->a + s->b + s->counter++;
+ s->a = s->b ^ (s->b >> 11);
+ s->b = s->c + (s->c << 3); // This is a multiply by 9
+ s->c = (s->c << 24 | s->c >> 40) + tmp;
+ return tmp;
+}
+
+/**
+ * returns the previous random value, and steps the generator backward.
+ *
+ * Its safe to take values before the first, but such values can be highly
+ * correlated to the seeds.
+ */
+static inline uint64_t ff_sfc64_reverse_get(FFSFC64 *s) {
+ uint64_t prev_c = s->b * 0x8E38E38E38E38E39;
+ uint64_t tmp = s->c - (prev_c << 24 | prev_c >> 40);
+ s->b = s->a ^ (s->a >> 11);
+ s->b ^= s->b >> 22;
+ s->b ^= s->b >> 44;
+
+ s->a = tmp - s->b - --s->counter;
+ s->c = prev_c;
+
+ return tmp;
+}
+
+/**
+ * Initialize sfc64 with up to 3 seeds.
+ *
+ * @param rounds number of rounds mixing up state during init. generally 8-18, larger numbers will help with bad quality seeds.
+ * 12 is a good choice if all 3 seeds are equal
+ *
+ */
+static inline void ff_sfc64_init(FFSFC64 *s, uint64_t seeda, uint64_t seedb, uint64_t seedc, int rounds) {
+ s->a = seeda;
+ s->b = seedb;
+ s->c = seedc;
+ s->counter = 1;
+ while (rounds--)
+ ff_sfc64_get(s);
+}
+
+#endif // AVUTIL_SFC64_H
diff --git a/tests/fate/libswresample.mak b/tests/fate/libswresample.mak
index 0d29f760248..2fc55ea61af 100644
--- a/tests/fate/libswresample.mak
+++ b/tests/fate/libswresample.mak
@@ -367,17 +367,17 @@ fate-swr-resample_async-$(3)-$(1)-$(2): FUZZ = 0.1
fate-swr-resample_async-$(3)-$(1)-$(2): REF = tests/data/asynth-$(1)-1.wav
endef
-fate-swr-resample_async-fltp-44100-8000: CMP_TARGET = 4020.60
-fate-swr-resample_async-fltp-44100-8000: SIZE_TOLERANCE = 529200 - 20310
+fate-swr-resample_async-fltp-44100-8000: CMP_TARGET = 3802.05
+fate-swr-resample_async-fltp-44100-8000: SIZE_TOLERANCE = 529200 - 20386
-fate-swr-resample_async-fltp-8000-44100: CMP_TARGET = 11186.66
-fate-swr-resample_async-fltp-8000-44100: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_async-fltp-8000-44100: CMP_TARGET = 10451.40
+fate-swr-resample_async-fltp-8000-44100: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_async-s16p-44100-8000: CMP_TARGET = 4020.71
-fate-swr-resample_async-s16p-44100-8000: SIZE_TOLERANCE = 529200 - 20310
+fate-swr-resample_async-s16p-44100-8000: CMP_TARGET = 3802.16
+fate-swr-resample_async-s16p-44100-8000: SIZE_TOLERANCE = 529200 - 20386
-fate-swr-resample_async-s16p-8000-44100: CMP_TARGET = 11186.94
-fate-swr-resample_async-s16p-8000-44100: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_async-s16p-8000-44100: CMP_TARGET = 10451.66
+fate-swr-resample_async-s16p-8000-44100: SIZE_TOLERANCE = 96000 - 20350
define ARESAMPLE_EXACT
FATE_SWR_RESAMPLE += fate-swr-resample_exact-$(3)-$(1)-$(2)
@@ -641,77 +641,77 @@ fate-swr-resample_exact_async-$(3)-$(1)-$(2): FUZZ = 0.1
fate-swr-resample_exact_async-$(3)-$(1)-$(2): REF = tests/data/asynth-$(1)-1.wav
endef
-fate-swr-resample_exact_async-dblp-44100-48000: CMP_TARGET = 7791.50
-fate-swr-resample_exact_async-dblp-44100-48000: SIZE_TOLERANCE = 529200 - 20300
+fate-swr-resample_exact_async-dblp-44100-48000: CMP_TARGET = 5947.75
+fate-swr-resample_exact_async-dblp-44100-48000: SIZE_TOLERANCE = 529200 - 20394
-fate-swr-resample_exact_async-dblp-44100-8000: CMP_TARGET = 4022.87
-fate-swr-resample_exact_async-dblp-44100-8000: SIZE_TOLERANCE = 529200 - 20310
+fate-swr-resample_exact_async-dblp-44100-8000: CMP_TARGET = 3804.22
+fate-swr-resample_exact_async-dblp-44100-8000: SIZE_TOLERANCE = 529200 - 20386
-fate-swr-resample_exact_async-dblp-48000-44100: CMP_TARGET = 1923.97
-fate-swr-resample_exact_async-dblp-48000-44100: SIZE_TOLERANCE = 576000 - 20298
+fate-swr-resample_exact_async-dblp-48000-44100: CMP_TARGET = 3056.62
+fate-swr-resample_exact_async-dblp-48000-44100: SIZE_TOLERANCE = 576000 - 20392
-fate-swr-resample_exact_async-dblp-48000-8000: CMP_TARGET = 2592.00
-fate-swr-resample_exact_async-dblp-48000-8000: SIZE_TOLERANCE = 576000 - 20304
+fate-swr-resample_exact_async-dblp-48000-8000: CMP_TARGET = 3408.30
+fate-swr-resample_exact_async-dblp-48000-8000: SIZE_TOLERANCE = 576000 - 20388
-fate-swr-resample_exact_async-dblp-8000-44100: CMP_TARGET = 11187.24
-fate-swr-resample_exact_async-dblp-8000-44100: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_async-dblp-8000-44100: CMP_TARGET = 10450.99
+fate-swr-resample_exact_async-dblp-8000-44100: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_async-dblp-8000-48000: CMP_TARGET = 11326.80
-fate-swr-resample_exact_async-dblp-8000-48000: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_async-dblp-8000-48000: CMP_TARGET = 10371.47
+fate-swr-resample_exact_async-dblp-8000-48000: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_async-fltp-44100-48000: CMP_TARGET = 7791.50
-fate-swr-resample_exact_async-fltp-44100-48000: SIZE_TOLERANCE = 529200 - 20300
+fate-swr-resample_exact_async-fltp-44100-48000: CMP_TARGET = 5947.75
+fate-swr-resample_exact_async-fltp-44100-48000: SIZE_TOLERANCE = 529200 - 20394
-fate-swr-resample_exact_async-fltp-44100-8000: CMP_TARGET = 4022.87
-fate-swr-resample_exact_async-fltp-44100-8000: SIZE_TOLERANCE = 529200 - 20310
+fate-swr-resample_exact_async-fltp-44100-8000: CMP_TARGET = 3804.22
+fate-swr-resample_exact_async-fltp-44100-8000: SIZE_TOLERANCE = 529200 - 20386
-fate-swr-resample_exact_async-fltp-48000-44100: CMP_TARGET = 1923.97
-fate-swr-resample_exact_async-fltp-48000-44100: SIZE_TOLERANCE = 576000 - 20298
+fate-swr-resample_exact_async-fltp-48000-44100: CMP_TARGET = 3056.62
+fate-swr-resample_exact_async-fltp-48000-44100: SIZE_TOLERANCE = 576000 - 20392
-fate-swr-resample_exact_async-fltp-48000-8000: CMP_TARGET = 2592.00
-fate-swr-resample_exact_async-fltp-48000-8000: SIZE_TOLERANCE = 576000 - 20304
+fate-swr-resample_exact_async-fltp-48000-8000: CMP_TARGET = 3408.30
+fate-swr-resample_exact_async-fltp-48000-8000: SIZE_TOLERANCE = 576000 - 20388
-fate-swr-resample_exact_async-fltp-8000-44100: CMP_TARGET = 11187.24
-fate-swr-resample_exact_async-fltp-8000-44100: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_async-fltp-8000-44100: CMP_TARGET = 10450.99
+fate-swr-resample_exact_async-fltp-8000-44100: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_async-fltp-8000-48000: CMP_TARGET = 11326.80
-fate-swr-resample_exact_async-fltp-8000-48000: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_async-fltp-8000-48000: CMP_TARGET = 10371.47
+fate-swr-resample_exact_async-fltp-8000-48000: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_async-s16p-44100-48000: CMP_TARGET = 7791.50
-fate-swr-resample_exact_async-s16p-44100-48000: SIZE_TOLERANCE = 529200 - 20300
+fate-swr-resample_exact_async-s16p-44100-48000: CMP_TARGET = 5947.75
+fate-swr-resample_exact_async-s16p-44100-48000: SIZE_TOLERANCE = 529200 - 20394
-fate-swr-resample_exact_async-s16p-44100-8000: CMP_TARGET = 4023.05
-fate-swr-resample_exact_async-s16p-44100-8000: SIZE_TOLERANCE = 529200 - 20310
+fate-swr-resample_exact_async-s16p-44100-8000: CMP_TARGET = 3804.37
+fate-swr-resample_exact_async-s16p-44100-8000: SIZE_TOLERANCE = 529200 - 20386
-fate-swr-resample_exact_async-s16p-48000-44100: CMP_TARGET = 1923.96
-fate-swr-resample_exact_async-s16p-48000-44100: SIZE_TOLERANCE = 576000 - 20298
+fate-swr-resample_exact_async-s16p-48000-44100: CMP_TARGET = 3056.62
+fate-swr-resample_exact_async-s16p-48000-44100: SIZE_TOLERANCE = 576000 - 20392
-fate-swr-resample_exact_async-s16p-48000-8000: CMP_TARGET = 2592.15
-fate-swr-resample_exact_async-s16p-48000-8000: SIZE_TOLERANCE = 576000 - 20304
+fate-swr-resample_exact_async-s16p-48000-8000: CMP_TARGET = 3408.50
+fate-swr-resample_exact_async-s16p-48000-8000: SIZE_TOLERANCE = 576000 - 20388
-fate-swr-resample_exact_async-s16p-8000-44100: CMP_TARGET = 11187.58
-fate-swr-resample_exact_async-s16p-8000-44100: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_async-s16p-8000-44100: CMP_TARGET = 10451.23
+fate-swr-resample_exact_async-s16p-8000-44100: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_async-s16p-8000-48000: CMP_TARGET = 11327.48
-fate-swr-resample_exact_async-s16p-8000-48000: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_async-s16p-8000-48000: CMP_TARGET = 10372.00
+fate-swr-resample_exact_async-s16p-8000-48000: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_async-s32p-44100-48000: CMP_TARGET = 7791.50
-fate-swr-resample_exact_async-s32p-44100-48000: SIZE_TOLERANCE = 529200 - 20300
+fate-swr-resample_exact_async-s32p-44100-48000: CMP_TARGET = 5947.75
+fate-swr-resample_exact_async-s32p-44100-48000: SIZE_TOLERANCE = 529200 - 20394
-fate-swr-resample_exact_async-s32p-44100-8000: CMP_TARGET = 4022.87
-fate-swr-resample_exact_async-s32p-44100-8000: SIZE_TOLERANCE = 529200 - 20310
+fate-swr-resample_exact_async-s32p-44100-8000: CMP_TARGET = 3804.22
+fate-swr-resample_exact_async-s32p-44100-8000: SIZE_TOLERANCE = 529200 - 20386
-fate-swr-resample_exact_async-s32p-48000-44100: CMP_TARGET = 1923.96
-fate-swr-resample_exact_async-s32p-48000-44100: SIZE_TOLERANCE = 576000 - 20298
+fate-swr-resample_exact_async-s32p-48000-44100: CMP_TARGET = 3056.62
+fate-swr-resample_exact_async-s32p-48000-44100: SIZE_TOLERANCE = 576000 - 20392
-fate-swr-resample_exact_async-s32p-48000-8000: CMP_TARGET = 2592.00
-fate-swr-resample_exact_async-s32p-48000-8000: SIZE_TOLERANCE = 576000 - 20304
+fate-swr-resample_exact_async-s32p-48000-8000: CMP_TARGET = 3408.30
+fate-swr-resample_exact_async-s32p-48000-8000: SIZE_TOLERANCE = 576000 - 20388
-fate-swr-resample_exact_async-s32p-8000-44100: CMP_TARGET = 11187.24
-fate-swr-resample_exact_async-s32p-8000-44100: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_async-s32p-8000-44100: CMP_TARGET = 10450.96
+fate-swr-resample_exact_async-s32p-8000-44100: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_async-s32p-8000-48000: CMP_TARGET = 11326.79
-fate-swr-resample_exact_async-s32p-8000-48000: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_async-s32p-8000-48000: CMP_TARGET = 10371.47
+fate-swr-resample_exact_async-s32p-8000-48000: SIZE_TOLERANCE = 96000 - 20350
define ARESAMPLE_EXACT_LIN
FATE_SWR_RESAMPLE += fate-swr-resample_exact_lin-$(3)-$(1)-$(2)
@@ -975,77 +975,77 @@ fate-swr-resample_exact_lin_async-$(3)-$(1)-$(2): FUZZ = 0.1
fate-swr-resample_exact_lin_async-$(3)-$(1)-$(2): REF = tests/data/asynth-$(1)-1.wav
endef
-fate-swr-resample_exact_lin_async-dblp-44100-48000: CMP_TARGET = 7791.72
-fate-swr-resample_exact_lin_async-dblp-44100-48000: SIZE_TOLERANCE = 529200 - 20300
+fate-swr-resample_exact_lin_async-dblp-44100-48000: CMP_TARGET = 5948.03
+fate-swr-resample_exact_lin_async-dblp-44100-48000: SIZE_TOLERANCE = 529200 - 20394
-fate-swr-resample_exact_lin_async-dblp-44100-8000: CMP_TARGET = 4023.01
-fate-swr-resample_exact_lin_async-dblp-44100-8000: SIZE_TOLERANCE = 529200 - 20310
+fate-swr-resample_exact_lin_async-dblp-44100-8000: CMP_TARGET = 3804.55
+fate-swr-resample_exact_lin_async-dblp-44100-8000: SIZE_TOLERANCE = 529200 - 20386
-fate-swr-resample_exact_lin_async-dblp-48000-44100: CMP_TARGET = 1923.79
-fate-swr-resample_exact_lin_async-dblp-48000-44100: SIZE_TOLERANCE = 576000 - 20298
+fate-swr-resample_exact_lin_async-dblp-48000-44100: CMP_TARGET = 3056.76
+fate-swr-resample_exact_lin_async-dblp-48000-44100: SIZE_TOLERANCE = 576000 - 20392
-fate-swr-resample_exact_lin_async-dblp-48000-8000: CMP_TARGET = 2591.72
-fate-swr-resample_exact_lin_async-dblp-48000-8000: SIZE_TOLERANCE = 576000 - 20304
+fate-swr-resample_exact_lin_async-dblp-48000-8000: CMP_TARGET = 3408.48
+fate-swr-resample_exact_lin_async-dblp-48000-8000: SIZE_TOLERANCE = 576000 - 20388
-fate-swr-resample_exact_lin_async-dblp-8000-44100: CMP_TARGET = 11187.24
-fate-swr-resample_exact_lin_async-dblp-8000-44100: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_lin_async-dblp-8000-44100: CMP_TARGET = 10451.07
+fate-swr-resample_exact_lin_async-dblp-8000-44100: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_lin_async-dblp-8000-48000: CMP_TARGET = 11326.80
-fate-swr-resample_exact_lin_async-dblp-8000-48000: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_lin_async-dblp-8000-48000: CMP_TARGET = 10371.52
+fate-swr-resample_exact_lin_async-dblp-8000-48000: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_lin_async-fltp-44100-48000: CMP_TARGET = 7791.72
-fate-swr-resample_exact_lin_async-fltp-44100-48000: SIZE_TOLERANCE = 529200 - 20300
+fate-swr-resample_exact_lin_async-fltp-44100-48000: CMP_TARGET = 5948.03
+fate-swr-resample_exact_lin_async-fltp-44100-48000: SIZE_TOLERANCE = 529200 - 20394
-fate-swr-resample_exact_lin_async-fltp-44100-8000: CMP_TARGET = 4023.01
-fate-swr-resample_exact_lin_async-fltp-44100-8000: SIZE_TOLERANCE = 529200 - 20310
+fate-swr-resample_exact_lin_async-fltp-44100-8000: CMP_TARGET = 3804.55
+fate-swr-resample_exact_lin_async-fltp-44100-8000: SIZE_TOLERANCE = 529200 - 20386
-fate-swr-resample_exact_lin_async-fltp-48000-44100: CMP_TARGET = 1923.79
-fate-swr-resample_exact_lin_async-fltp-48000-44100: SIZE_TOLERANCE = 576000 - 20298
+fate-swr-resample_exact_lin_async-fltp-48000-44100: CMP_TARGET = 3056.76
+fate-swr-resample_exact_lin_async-fltp-48000-44100: SIZE_TOLERANCE = 576000 - 20392
-fate-swr-resample_exact_lin_async-fltp-48000-8000: CMP_TARGET = 2591.72
-fate-swr-resample_exact_lin_async-fltp-48000-8000: SIZE_TOLERANCE = 576000 - 20304
+fate-swr-resample_exact_lin_async-fltp-48000-8000: CMP_TARGET = 3408.48
+fate-swr-resample_exact_lin_async-fltp-48000-8000: SIZE_TOLERANCE = 576000 - 20388
-fate-swr-resample_exact_lin_async-fltp-8000-44100: CMP_TARGET = 11187.25
-fate-swr-resample_exact_lin_async-fltp-8000-44100: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_lin_async-fltp-8000-44100: CMP_TARGET = 10451.07
+fate-swr-resample_exact_lin_async-fltp-8000-44100: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_lin_async-fltp-8000-48000: CMP_TARGET = 11326.80
-fate-swr-resample_exact_lin_async-fltp-8000-48000: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_lin_async-fltp-8000-48000: CMP_TARGET = 10371.52
+fate-swr-resample_exact_lin_async-fltp-8000-48000: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_lin_async-s16p-44100-48000: CMP_TARGET = 7791.72
-fate-swr-resample_exact_lin_async-s16p-44100-48000: SIZE_TOLERANCE = 529200 - 20300
+fate-swr-resample_exact_lin_async-s16p-44100-48000: CMP_TARGET = 5948.03
+fate-swr-resample_exact_lin_async-s16p-44100-48000: SIZE_TOLERANCE = 529200 - 20394
-fate-swr-resample_exact_lin_async-s16p-44100-8000: CMP_TARGET = 4023.19
-fate-swr-resample_exact_lin_async-s16p-44100-8000: SIZE_TOLERANCE = 529200 - 20310
+fate-swr-resample_exact_lin_async-s16p-44100-8000: CMP_TARGET = 3804.70
+fate-swr-resample_exact_lin_async-s16p-44100-8000: SIZE_TOLERANCE = 529200 - 20386
-fate-swr-resample_exact_lin_async-s16p-48000-44100: CMP_TARGET = 1923.79
-fate-swr-resample_exact_lin_async-s16p-48000-44100: SIZE_TOLERANCE = 576000 - 20298
+fate-swr-resample_exact_lin_async-s16p-48000-44100: CMP_TARGET = 3056.76
+fate-swr-resample_exact_lin_async-s16p-48000-44100: SIZE_TOLERANCE = 576000 - 20392
-fate-swr-resample_exact_lin_async-s16p-48000-8000: CMP_TARGET = 2591.85
-fate-swr-resample_exact_lin_async-s16p-48000-8000: SIZE_TOLERANCE = 576000 - 20304
+fate-swr-resample_exact_lin_async-s16p-48000-8000: CMP_TARGET = 3408.68
+fate-swr-resample_exact_lin_async-s16p-48000-8000: SIZE_TOLERANCE = 576000 - 20388
-fate-swr-resample_exact_lin_async-s16p-8000-44100: CMP_TARGET = 11187.57
-fate-swr-resample_exact_lin_async-s16p-8000-44100: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_lin_async-s16p-8000-44100: CMP_TARGET = 10451.31
+fate-swr-resample_exact_lin_async-s16p-8000-44100: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_lin_async-s16p-8000-48000: CMP_TARGET = 11327.48
-fate-swr-resample_exact_lin_async-s16p-8000-48000: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_lin_async-s16p-8000-48000: CMP_TARGET = 10372.05
+fate-swr-resample_exact_lin_async-s16p-8000-48000: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_lin_async-s32p-44100-48000: CMP_TARGET = 7791.72
-fate-swr-resample_exact_lin_async-s32p-44100-48000: SIZE_TOLERANCE = 529200 - 20300
+fate-swr-resample_exact_lin_async-s32p-44100-48000: CMP_TARGET = 5948.03
+fate-swr-resample_exact_lin_async-s32p-44100-48000: SIZE_TOLERANCE = 529200 - 20394
-fate-swr-resample_exact_lin_async-s32p-44100-8000: CMP_TARGET = 4023.02
-fate-swr-resample_exact_lin_async-s32p-44100-8000: SIZE_TOLERANCE = 529200 - 20310
+fate-swr-resample_exact_lin_async-s32p-44100-8000: CMP_TARGET = 3804.54
+fate-swr-resample_exact_lin_async-s32p-44100-8000: SIZE_TOLERANCE = 529200 - 20386
-fate-swr-resample_exact_lin_async-s32p-48000-44100: CMP_TARGET = 1923.77
-fate-swr-resample_exact_lin_async-s32p-48000-44100: SIZE_TOLERANCE = 576000 - 20298
+fate-swr-resample_exact_lin_async-s32p-48000-44100: CMP_TARGET = 3056.76
+fate-swr-resample_exact_lin_async-s32p-48000-44100: SIZE_TOLERANCE = 576000 - 20392
-fate-swr-resample_exact_lin_async-s32p-48000-8000: CMP_TARGET = 2591.71
-fate-swr-resample_exact_lin_async-s32p-48000-8000: SIZE_TOLERANCE = 576000 - 20304
+fate-swr-resample_exact_lin_async-s32p-48000-8000: CMP_TARGET = 3408.47
+fate-swr-resample_exact_lin_async-s32p-48000-8000: SIZE_TOLERANCE = 576000 - 20388
-fate-swr-resample_exact_lin_async-s32p-8000-44100: CMP_TARGET = 11187.25
-fate-swr-resample_exact_lin_async-s32p-8000-44100: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_lin_async-s32p-8000-44100: CMP_TARGET = 10451.04
+fate-swr-resample_exact_lin_async-s32p-8000-44100: SIZE_TOLERANCE = 96000 - 20350
-fate-swr-resample_exact_lin_async-s32p-8000-48000: CMP_TARGET = 11326.81
-fate-swr-resample_exact_lin_async-s32p-8000-48000: SIZE_TOLERANCE = 96000 - 20344
+fate-swr-resample_exact_lin_async-s32p-8000-48000: CMP_TARGET = 10371.53
+fate-swr-resample_exact_lin_async-s32p-8000-48000: SIZE_TOLERANCE = 96000 - 20350
$(call CROSS_TEST,$(SAMPLERATES),ARESAMPLE,s16p,s16le,s16)
$(call CROSS_TEST,$(SAMPLERATES),ARESAMPLE,s32p,s32le,s16)
diff --git a/tests/ref/fate/eval b/tests/ref/fate/eval
index 5b4d93f4274..441f9846c46 100644
--- a/tests/ref/fate/eval
+++ b/tests/ref/fate/eval
@@ -257,7 +257,7 @@ Evaluating 'root(sin(ld(0))+6+sin(ld(0)/12)-log(ld(0)), 100)'
'root(sin(ld(0))+6+sin(ld(0)/12)-log(ld(0)), 100)' -> 60.965601
Evaluating '7000000B*random(0)'
-'7000000B*random(0)' -> 0.003078
+'7000000B*random(0)' -> 12864914.486611
Evaluating 'squish(2)'
'squish(2)' -> 0.000335
--
2.17.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG
2024-01-13 3:51 [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG Michael Niedermayer
@ 2024-01-14 14:14 ` Stefano Sabatini
2024-01-16 0:27 ` Michael Niedermayer
0 siblings, 1 reply; 8+ messages in thread
From: Stefano Sabatini @ 2024-01-14 14:14 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On date Saturday 2024-01-13 04:51:06 +0100, Michael Niedermayer wrote:
> This is the 64bit version of Chris Doty-Humphreys SFC64
>
> Compared to the LCGs these produce much better quality numbers.
> Compared to LFGs this needs less state. (our LFG has 224 byte
> state for its 32bit version) this has 32byte state
> Also the initialization for our LFG is slower.
> This is also much faster than KISS or PCG.
>
> This commit replaces the broken LCG used before.
> (broken as it had only a period ~200M due to being put in a double)
>
> This changes the output from random() which is why libswresample.mak
> is updated, update was done using the command in libswresample.mak
>
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
> libavutil/eval.c | 24 +++-
> libavutil/sfc64.h | 85 ++++++++++++++
> tests/fate/libswresample.mak | 208 +++++++++++++++++------------------
> tests/ref/fate/eval | 2 +-
> 4 files changed, 210 insertions(+), 109 deletions(-)
> create mode 100644 libavutil/sfc64.h
>
> diff --git a/libavutil/eval.c b/libavutil/eval.c
> index dc6b3697bc2..349015d4fa3 100644
> --- a/libavutil/eval.c
> +++ b/libavutil/eval.c
> @@ -35,6 +35,7 @@
> #include "internal.h"
> #include "log.h"
> #include "mathematics.h"
> +#include "sfc64.h"
> #include "time.h"
> #include "avstring.h"
> #include "timer.h"
> @@ -55,6 +56,7 @@ typedef struct Parser {
> void *log_ctx;
> #define VARS 10
> double *var;
> + FFSFC64 *prng_state;
> } Parser;
>
> static const AVClass eval_class = {
> @@ -173,6 +175,7 @@ struct AVExpr {
> } a;
> struct AVExpr *param[3];
> double *var;
> + FFSFC64 *prng_state;
> };
>
> static double etime(double v)
> @@ -231,8 +234,14 @@ static double eval_expr(Parser *p, AVExpr *e)
>
> #define COMPUTE_NEXT_RANDOM() \
> int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
> - uint64_t r = isnan(p->var[idx]) ? 0 : p->var[idx]; \
> - r = r * 1664525 + 1013904223; \
> + FFSFC64 *s = p->prng_state + idx; \
> + uint64_t r; \
> + \
> + if (!s->counter) { \
> + r = isnan(p->var[idx]) ? 0 : p->var[idx]; \
> + ff_sfc64_init(s, r, r, r, 12); \
> + } \
> + r = ff_sfc64_get(s); \
> p->var[idx] = r; \
>
> case e_random: {
> @@ -329,7 +338,11 @@ static double eval_expr(Parser *p, AVExpr *e)
> case e_div: return e->value * (d2 ? (d / d2) : d * INFINITY);
> case e_add: return e->value * (d + d2);
> case e_last:return e->value * d2;
> - case e_st : return e->value * (p->var[av_clip(d, 0, VARS-1)]= d2);
> + case e_st : {
> + int index = av_clip(d, 0, VARS-1);
> + p->prng_state[index].counter = 0;
> + return e->value * (p->var[index]= d2);
> + }
> case e_hypot:return e->value * hypot(d, d2);
> case e_atan2:return e->value * atan2(d, d2);
> case e_bitand: return isnan(d) || isnan(d2) ? NAN : e->value * ((long int)d & (long int)d2);
> @@ -349,6 +362,7 @@ void av_expr_free(AVExpr *e)
> av_expr_free(e->param[1]);
> av_expr_free(e->param[2]);
> av_freep(&e->var);
> + av_freep(&e->prng_state);
> av_freep(&e);
> }
>
> @@ -736,7 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
> goto end;
> }
> e->var= av_mallocz(sizeof(double) *VARS);
> - if (!e->var) {
> + e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
> + if (!e->var || !e->prng_state) {
> ret = AVERROR(ENOMEM);
> goto end;
> }
> @@ -778,6 +793,7 @@ double av_expr_eval(AVExpr *e, const double *const_values, void *opaque)
> {
> Parser p = { 0 };
> p.var= e->var;
> + p.prng_state= e->prng_state;
>
> p.const_values = const_values;
> p.opaque = opaque;
> diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
> new file mode 100644
> index 00000000000..05f1e84cc68
> --- /dev/null
> +++ b/libavutil/sfc64.h
> @@ -0,0 +1,85 @@
> +/*
> + * Copyright (c) 2024 Michael Niedermayer <michael-ffmpeg@niedermayer.cc>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + *
> + * This is a implementation of SFC64, a 64-bit PRNG by Chris Doty-Humphrey.
> + *
> + * This Generator is much faster (0m1.872s) than 64bit KISS (0m3.823s) and PCG-XSH-RR-64/32 (0m2.700s)
> + * And passes testu01 and practrand test suits.
> + *
nit: possibly better to put this in @file
> + */
> +
> +/**
> + * @file
> + * simple Pseudo Random Number Generator
> + *
> + */
> +
> +#ifndef AVUTIL_SFC64_H
> +#define AVUTIL_SFC64_H
> +
> +#include <inttypes.h>
> +
> +typedef struct FFSFC64 {
> + uint64_t a,b,c,counter;
> +} FFSFC64;
> +
> +static inline uint64_t ff_sfc64_get(FFSFC64 *s) {
> + uint64_t tmp = s->a + s->b + s->counter++;
> + s->a = s->b ^ (s->b >> 11);
> + s->b = s->c + (s->c << 3); // This is a multiply by 9
> + s->c = (s->c << 24 | s->c >> 40) + tmp;
> + return tmp;
> +}
> +
> +/**
> + * returns the previous random value, and steps the generator backward.
> + *
> + * Its safe to take values before the first, but such values can be highly
> + * correlated to the seeds.
Return ..., and step the generator...
It is safe to take values before the first, but such values can be highly
correlated to the seeds.
> + */
> +static inline uint64_t ff_sfc64_reverse_get(FFSFC64 *s) {
> + uint64_t prev_c = s->b * 0x8E38E38E38E38E39;
> + uint64_t tmp = s->c - (prev_c << 24 | prev_c >> 40);
> + s->b = s->a ^ (s->a >> 11);
> + s->b ^= s->b >> 22;
> + s->b ^= s->b >> 44;
> +
> + s->a = tmp - s->b - --s->counter;
> + s->c = prev_c;
> +
> + return tmp;
> +}
> +
> +/**
> + * Initialize sfc64 with up to 3 seeds.
> + *
> + * @param rounds number of rounds mixing up state during init. generally 8-18, larger numbers will help with bad quality seeds.
> + * 12 is a good choice if all 3 seeds are equal
uppercase Generally after the dot.
Looks good to me apart for the minor nits.
Thanks!!
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG
2024-01-14 14:14 ` Stefano Sabatini
@ 2024-01-16 0:27 ` Michael Niedermayer
0 siblings, 0 replies; 8+ messages in thread
From: Michael Niedermayer @ 2024-01-16 0:27 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 8288 bytes --]
On Sun, Jan 14, 2024 at 03:14:23PM +0100, Stefano Sabatini wrote:
> On date Saturday 2024-01-13 04:51:06 +0100, Michael Niedermayer wrote:
> > This is the 64bit version of Chris Doty-Humphreys SFC64
> >
> > Compared to the LCGs these produce much better quality numbers.
> > Compared to LFGs this needs less state. (our LFG has 224 byte
> > state for its 32bit version) this has 32byte state
> > Also the initialization for our LFG is slower.
> > This is also much faster than KISS or PCG.
> >
> > This commit replaces the broken LCG used before.
> > (broken as it had only a period ~200M due to being put in a double)
> >
> > This changes the output from random() which is why libswresample.mak
> > is updated, update was done using the command in libswresample.mak
> >
> > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > ---
> > libavutil/eval.c | 24 +++-
> > libavutil/sfc64.h | 85 ++++++++++++++
> > tests/fate/libswresample.mak | 208 +++++++++++++++++------------------
> > tests/ref/fate/eval | 2 +-
> > 4 files changed, 210 insertions(+), 109 deletions(-)
> > create mode 100644 libavutil/sfc64.h
> >
> > diff --git a/libavutil/eval.c b/libavutil/eval.c
> > index dc6b3697bc2..349015d4fa3 100644
> > --- a/libavutil/eval.c
> > +++ b/libavutil/eval.c
> > @@ -35,6 +35,7 @@
> > #include "internal.h"
> > #include "log.h"
> > #include "mathematics.h"
> > +#include "sfc64.h"
> > #include "time.h"
> > #include "avstring.h"
> > #include "timer.h"
> > @@ -55,6 +56,7 @@ typedef struct Parser {
> > void *log_ctx;
> > #define VARS 10
> > double *var;
> > + FFSFC64 *prng_state;
> > } Parser;
> >
> > static const AVClass eval_class = {
> > @@ -173,6 +175,7 @@ struct AVExpr {
> > } a;
> > struct AVExpr *param[3];
> > double *var;
> > + FFSFC64 *prng_state;
> > };
> >
> > static double etime(double v)
> > @@ -231,8 +234,14 @@ static double eval_expr(Parser *p, AVExpr *e)
> >
> > #define COMPUTE_NEXT_RANDOM() \
> > int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
> > - uint64_t r = isnan(p->var[idx]) ? 0 : p->var[idx]; \
> > - r = r * 1664525 + 1013904223; \
> > + FFSFC64 *s = p->prng_state + idx; \
> > + uint64_t r; \
> > + \
> > + if (!s->counter) { \
> > + r = isnan(p->var[idx]) ? 0 : p->var[idx]; \
> > + ff_sfc64_init(s, r, r, r, 12); \
> > + } \
> > + r = ff_sfc64_get(s); \
> > p->var[idx] = r; \
> >
> > case e_random: {
> > @@ -329,7 +338,11 @@ static double eval_expr(Parser *p, AVExpr *e)
> > case e_div: return e->value * (d2 ? (d / d2) : d * INFINITY);
> > case e_add: return e->value * (d + d2);
> > case e_last:return e->value * d2;
> > - case e_st : return e->value * (p->var[av_clip(d, 0, VARS-1)]= d2);
> > + case e_st : {
> > + int index = av_clip(d, 0, VARS-1);
> > + p->prng_state[index].counter = 0;
> > + return e->value * (p->var[index]= d2);
> > + }
> > case e_hypot:return e->value * hypot(d, d2);
> > case e_atan2:return e->value * atan2(d, d2);
> > case e_bitand: return isnan(d) || isnan(d2) ? NAN : e->value * ((long int)d & (long int)d2);
> > @@ -349,6 +362,7 @@ void av_expr_free(AVExpr *e)
> > av_expr_free(e->param[1]);
> > av_expr_free(e->param[2]);
> > av_freep(&e->var);
> > + av_freep(&e->prng_state);
> > av_freep(&e);
> > }
> >
> > @@ -736,7 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
> > goto end;
> > }
> > e->var= av_mallocz(sizeof(double) *VARS);
> > - if (!e->var) {
> > + e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
> > + if (!e->var || !e->prng_state) {
> > ret = AVERROR(ENOMEM);
> > goto end;
> > }
> > @@ -778,6 +793,7 @@ double av_expr_eval(AVExpr *e, const double *const_values, void *opaque)
> > {
> > Parser p = { 0 };
> > p.var= e->var;
> > + p.prng_state= e->prng_state;
> >
> > p.const_values = const_values;
> > p.opaque = opaque;
> > diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
> > new file mode 100644
> > index 00000000000..05f1e84cc68
> > --- /dev/null
> > +++ b/libavutil/sfc64.h
> > @@ -0,0 +1,85 @@
> > +/*
> > + * Copyright (c) 2024 Michael Niedermayer <michael-ffmpeg@niedermayer.cc>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > + *
>
> > + * This is a implementation of SFC64, a 64-bit PRNG by Chris Doty-Humphrey.
> > + *
> > + * This Generator is much faster (0m1.872s) than 64bit KISS (0m3.823s) and PCG-XSH-RR-64/32 (0m2.700s)
> > + * And passes testu01 and practrand test suits.
> > + *
>
> nit: possibly better to put this in @file
will move
>
> > + */
> > +
> > +/**
> > + * @file
> > + * simple Pseudo Random Number Generator
> > + *
> > + */
> > +
> > +#ifndef AVUTIL_SFC64_H
> > +#define AVUTIL_SFC64_H
> > +
> > +#include <inttypes.h>
> > +
> > +typedef struct FFSFC64 {
> > + uint64_t a,b,c,counter;
> > +} FFSFC64;
> > +
> > +static inline uint64_t ff_sfc64_get(FFSFC64 *s) {
> > + uint64_t tmp = s->a + s->b + s->counter++;
> > + s->a = s->b ^ (s->b >> 11);
> > + s->b = s->c + (s->c << 3); // This is a multiply by 9
> > + s->c = (s->c << 24 | s->c >> 40) + tmp;
> > + return tmp;
> > +}
> > +
>
> > +/**
> > + * returns the previous random value, and steps the generator backward.
> > + *
> > + * Its safe to take values before the first, but such values can be highly
> > + * correlated to the seeds.
>
> Return ..., and step the generator...
>
> It is safe to take values before the first, but such values can be highly
> correlated to the seeds.
will change
>
> > + */
> > +static inline uint64_t ff_sfc64_reverse_get(FFSFC64 *s) {
> > + uint64_t prev_c = s->b * 0x8E38E38E38E38E39;
> > + uint64_t tmp = s->c - (prev_c << 24 | prev_c >> 40);
> > + s->b = s->a ^ (s->a >> 11);
> > + s->b ^= s->b >> 22;
> > + s->b ^= s->b >> 44;
> > +
> > + s->a = tmp - s->b - --s->counter;
> > + s->c = prev_c;
> > +
> > + return tmp;
> > +}
> > +
> > +/**
> > + * Initialize sfc64 with up to 3 seeds.
> > + *
>
> > + * @param rounds number of rounds mixing up state during init. generally 8-18, larger numbers will help with bad quality seeds.
> > + * 12 is a good choice if all 3 seeds are equal
>
> uppercase Generally after the dot.
>
> Looks good to me apart for the minor nits.
ok, will apply with these changes
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Everything should be made as simple as possible, but not simpler.
-- Albert Einstein
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG
2024-01-19 8:53 ` Michael Koch
@ 2024-01-20 0:33 ` Michael Niedermayer
0 siblings, 0 replies; 8+ messages in thread
From: Michael Niedermayer @ 2024-01-20 0:33 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 1151 bytes --]
On Fri, Jan 19, 2024 at 09:53:46AM +0100, Michael Koch wrote:
> There is still a small problem with the random generator, but this has
> nothing to do with the recent changes.
> If the random() expression is used in the geq filter, then multiple pixels
> get the same sequence of random numbers.
> As can be shown with this command, where the frame has only two pixels:
>
> ffmpeg -loglevel repeat -f lavfi -i nullsrc=size=1x2,format=gray -vf
> "geq=lum='print(random(0));print(random(0));print(random(0))'" -frames 1 -y
> out.png
>
> I think it's because the filter is executed in multiple threads.
> -filter_threads 1 fixes the problem, but it slows down the whole filter
> thread.
You can avoid this by using
ifnot(X,st(0,Y))
which would reseed the random number generator differently on the first pixel of
each line
Not sure this is the best solution, better ideas are welcome
thx
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Some people wanted to paint the bikeshed green, some blue and some pink.
People argued and fought, when they finally agreed, only rust was left.
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG
2024-01-09 1:55 Michael Niedermayer
2024-01-10 22:48 ` Stefano Sabatini
@ 2024-01-19 8:53 ` Michael Koch
2024-01-20 0:33 ` Michael Niedermayer
1 sibling, 1 reply; 8+ messages in thread
From: Michael Koch @ 2024-01-19 8:53 UTC (permalink / raw)
To: ffmpeg-devel
There is still a small problem with the random generator, but this has
nothing to do with the recent changes.
If the random() expression is used in the geq filter, then multiple
pixels get the same sequence of random numbers.
As can be shown with this command, where the frame has only two pixels:
ffmpeg -loglevel repeat -f lavfi -i nullsrc=size=1x2,format=gray -vf
"geq=lum='print(random(0));print(random(0));print(random(0))'" -frames 1
-y out.png
I think it's because the filter is executed in multiple threads.
-filter_threads 1 fixes the problem, but it slows down the whole filter
thread.
Michael
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG
2024-01-10 22:48 ` Stefano Sabatini
@ 2024-01-11 2:39 ` Michael Niedermayer
0 siblings, 0 replies; 8+ messages in thread
From: Michael Niedermayer @ 2024-01-11 2:39 UTC (permalink / raw)
To: FFmpeg development discussions and patches
[-- Attachment #1.1: Type: text/plain, Size: 5653 bytes --]
On Wed, Jan 10, 2024 at 11:48:33PM +0100, Stefano Sabatini wrote:
> On date Tuesday 2024-01-09 02:55:21 +0100, Michael Niedermayer wrote:
[...]
> >
> > static const AVClass eval_class = {
> > @@ -174,7 +175,7 @@ struct AVExpr {
> > } a;
> > struct AVExpr *param[3];
> > double *var;
> > - uint64_t *var_uint64;
> > + SFC64 *prng_state;
> > };
> >
> > static double etime(double v)
> > @@ -233,10 +234,15 @@ static double eval_expr(Parser *p, AVExpr *e)
> >
> > #define COMPUTE_NEXT_RANDOM() \
> > int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
> > - uint64_t r = p->var_uint64[idx] ? p->var_uint64[idx] : (isnan(p->var[idx]) ? 0 : p->var[idx]);\
> > - r = r * 1664525 + 1013904223; \
> > + SFC64 *s = p->prng_state + idx; \
> > + uint64_t r; \
> > + \
> > + if (!s->counter) { \
> > + r = isnan(p->var[idx]) ? 0 : p->var[idx]; \
>
> > + sfc64_init(s, r, r, r, 12); \
>
> for the record, why 12?
The reference has 3 init functions
* one that uses one seed for the 3 parameters, it uses 12 rounds
* one that uses 3 seperate seeds that uses 18 rounds
* one that has "fast" in its name and does 8 rounds with one seed in 3 parameters
I will document this better
[...]
> > return e->value * (p->var[index]= d2);
> > }
> > case e_hypot:return e->value * hypot(d, d2);
> > @@ -356,7 +362,7 @@ void av_expr_free(AVExpr *e)
> > av_expr_free(e->param[1]);
> > av_expr_free(e->param[2]);
> > av_freep(&e->var);
> > - av_freep(&e->var_uint64);
> > + av_freep(&e->prng_state);
> > av_freep(&e);
> > }
> >
> > @@ -744,8 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
> > goto end;
> > }
> > e->var= av_mallocz(sizeof(double) *VARS);
> > - e->var_uint64= av_mallocz(sizeof(uint64_t) *VARS);
> > - if (!e->var || !e->var_uint64) {
> > + e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
> > + if (!e->var || !e->prng_state) {
> > ret = AVERROR(ENOMEM);
> > goto end;
> > }
> > @@ -787,7 +793,7 @@ double av_expr_eval(AVExpr *e, const double *const_values, void *opaque)
> > {
> > Parser p = { 0 };
> > p.var= e->var;
> > - p.var_uint64= e->var_uint64;
> > + p.prng_state= e->prng_state;
> >
> > p.const_values = const_values;
> > p.opaque = opaque;
> > diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
> > new file mode 100644
> > index 00000000000..25bc43abef1
> > --- /dev/null
> > +++ b/libavutil/sfc64.h
> > @@ -0,0 +1,59 @@
> > +/*
> > + * Copyright (c) 2024 Michael Niedermayer <michael-ffmpeg@niedermayer.cc>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> > + *
>
> > + * This is a implementation of SFC64 a 64-bit PRNG by Chris Doty-Humphrey.
>
> nit: This is a implementation of SFC64, a 64-bit PRNG by Chris Doty-Humphrey.
>
> > + *
> > + * This Generator is much faster (0m1.872s) than 64bit KISS (0m3.823s) and PCG-XSH-RR-64/32 (0m2.700s)
>
> what are these benchmarks against?
a loop that computes alot of random numbers and at the end prints their sum.
The behavior was btw quite different if the numbers are not summed and printed
as the compiler can then optimize some things out but noone would run a PRNG
and not use the values.
[...]
> > +static inline uint64_t sfc64_get(SFC64 *s) {
> > + uint64_t tmp = s->a + s->b + s->counter++;
> > + s->a = s->b ^ (s->b >> 11);
> > + s->b = s->c + (s->c << 3); // This is a multiply by 9
> > + s->c = ((s->c << 24) | (s->c >> 40)) + tmp;
> > + return tmp;
> > +}
> > +
> > +static inline void sfc64_init(SFC64 *s, uint64_t seeda, uint64_t seedb, uint64_t seedc, int rounds) {
> > + s->a = seeda;
> > + s->b = seedb;
> > + s->c = seedc;
> > + s->counter = 1;
> > + while (rounds--)
> > + sfc64_get(s);
> > +}
> > +
> > +#endif // AVUTIL_SFC64_H
>
> nit: probably it still makes sense to use ff/FF prefixes even if the
> header is not public (and if this is useful, probably it could be made
> public as a faster/smaller alternative to lfg).
ok
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Take away the freedom of one citizen and you will be jailed, take away
the freedom of all citizens and you will be congratulated by your peers
in Parliament.
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 195 bytes --]
[-- Attachment #2: Type: text/plain, Size: 251 bytes --]
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG
2024-01-09 1:55 Michael Niedermayer
@ 2024-01-10 22:48 ` Stefano Sabatini
2024-01-11 2:39 ` Michael Niedermayer
2024-01-19 8:53 ` Michael Koch
1 sibling, 1 reply; 8+ messages in thread
From: Stefano Sabatini @ 2024-01-10 22:48 UTC (permalink / raw)
To: FFmpeg development discussions and patches
On date Tuesday 2024-01-09 02:55:21 +0100, Michael Niedermayer wrote:
> This is the 64bit version of Chris Doty-Humphreys SFC64
>
> Compared to the LCGs these produce much better quality numbers.
> Compared to LFGs this needs less state. (our LFG has 224 byte
> state for its 32bit version) this has 32byte state
> Also the initialization for our LFG is slower.
> This is also much faster than KISS or PCG.
>
> This could be merged with the change to integer LCG
> Also a few fate tests need an update. I will update fate if SFC64
> is the chosen PRNG
>
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
> libavutil/eval.c | 26 ++++++++++++--------
> libavutil/sfc64.h | 59 +++++++++++++++++++++++++++++++++++++++++++++
> tests/ref/fate/eval | 2 +-
> 3 files changed, 76 insertions(+), 11 deletions(-)
cool :-)
> create mode 100644 libavutil/sfc64.h
>
> diff --git a/libavutil/eval.c b/libavutil/eval.c
> index 9d41140056c..d15becf9cda 100644
> --- a/libavutil/eval.c
> +++ b/libavutil/eval.c
> @@ -33,6 +33,7 @@
> #include "eval.h"
> #include "ffmath.h"
> #include "internal.h"
> +#include "sfc64.h"
nit: sort order
> #include "log.h"
> #include "mathematics.h"
> #include "time.h"
> @@ -55,7 +56,7 @@ typedef struct Parser {
> void *log_ctx;
> #define VARS 10
> double *var;
> - uint64_t *var_uint64;
> + SFC64 *prng_state;
> } Parser;
this is on top of another patch I guess
>
> static const AVClass eval_class = {
> @@ -174,7 +175,7 @@ struct AVExpr {
> } a;
> struct AVExpr *param[3];
> double *var;
> - uint64_t *var_uint64;
> + SFC64 *prng_state;
> };
>
> static double etime(double v)
> @@ -233,10 +234,15 @@ static double eval_expr(Parser *p, AVExpr *e)
>
> #define COMPUTE_NEXT_RANDOM() \
> int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
> - uint64_t r = p->var_uint64[idx] ? p->var_uint64[idx] : (isnan(p->var[idx]) ? 0 : p->var[idx]);\
> - r = r * 1664525 + 1013904223; \
> + SFC64 *s = p->prng_state + idx; \
> + uint64_t r; \
> + \
> + if (!s->counter) { \
> + r = isnan(p->var[idx]) ? 0 : p->var[idx]; \
> + sfc64_init(s, r, r, r, 12); \
for the record, why 12?
> + } \
> + r = sfc64_get(s); \
> p->var[idx] = r; \
> - p->var_uint64[idx]= r;
>
> case e_random: {
> COMPUTE_NEXT_RANDOM();
> @@ -334,7 +340,7 @@ static double eval_expr(Parser *p, AVExpr *e)
> case e_last:return e->value * d2;
> case e_st : {
> int index = av_clip(d, 0, VARS-1);
> - p->var_uint64[index] = 0;
> + p->prng_state[index].counter = 0;
I wonder if we should have a dedicated strandom() (or randomst)
function to store the value (and deprecate st for setting the random
seed, now that we are using a separated variable to store the state) -
not blocking though
> return e->value * (p->var[index]= d2);
> }
> case e_hypot:return e->value * hypot(d, d2);
> @@ -356,7 +362,7 @@ void av_expr_free(AVExpr *e)
> av_expr_free(e->param[1]);
> av_expr_free(e->param[2]);
> av_freep(&e->var);
> - av_freep(&e->var_uint64);
> + av_freep(&e->prng_state);
> av_freep(&e);
> }
>
> @@ -744,8 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
> goto end;
> }
> e->var= av_mallocz(sizeof(double) *VARS);
> - e->var_uint64= av_mallocz(sizeof(uint64_t) *VARS);
> - if (!e->var || !e->var_uint64) {
> + e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
> + if (!e->var || !e->prng_state) {
> ret = AVERROR(ENOMEM);
> goto end;
> }
> @@ -787,7 +793,7 @@ double av_expr_eval(AVExpr *e, const double *const_values, void *opaque)
> {
> Parser p = { 0 };
> p.var= e->var;
> - p.var_uint64= e->var_uint64;
> + p.prng_state= e->prng_state;
>
> p.const_values = const_values;
> p.opaque = opaque;
> diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
> new file mode 100644
> index 00000000000..25bc43abef1
> --- /dev/null
> +++ b/libavutil/sfc64.h
> @@ -0,0 +1,59 @@
> +/*
> + * Copyright (c) 2024 Michael Niedermayer <michael-ffmpeg@niedermayer.cc>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + *
> + * This is a implementation of SFC64 a 64-bit PRNG by Chris Doty-Humphrey.
nit: This is a implementation of SFC64, a 64-bit PRNG by Chris Doty-Humphrey.
> + *
> + * This Generator is much faster (0m1.872s) than 64bit KISS (0m3.823s) and PCG-XSH-RR-64/32 (0m2.700s)
what are these benchmarks against?
> + * And passes testu01 and practrand test suits.
> + *
> + */
> +
> +/**
> + * @file
> + * simple Pseudo Random Number Generator
> + *
> + */
> +
> +#ifndef AVUTIL_SFC64_H
> +#define AVUTIL_SFC64_H
> +
> +#include <inttypes.h>
> +
> +typedef struct SFC64 {
> + uint64_t a,b,c,counter;
> +} SFC64;
> +
> +static inline uint64_t sfc64_get(SFC64 *s) {
> + uint64_t tmp = s->a + s->b + s->counter++;
> + s->a = s->b ^ (s->b >> 11);
> + s->b = s->c + (s->c << 3); // This is a multiply by 9
> + s->c = ((s->c << 24) | (s->c >> 40)) + tmp;
> + return tmp;
> +}
> +
> +static inline void sfc64_init(SFC64 *s, uint64_t seeda, uint64_t seedb, uint64_t seedc, int rounds) {
> + s->a = seeda;
> + s->b = seedb;
> + s->c = seedc;
> + s->counter = 1;
> + while (rounds--)
> + sfc64_get(s);
> +}
> +
> +#endif // AVUTIL_SFC64_H
nit: probably it still makes sense to use ff/FF prefixes even if the
header is not public (and if this is useful, probably it could be made
public as a faster/smaller alternative to lfg).
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
* [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG
@ 2024-01-09 1:55 Michael Niedermayer
2024-01-10 22:48 ` Stefano Sabatini
2024-01-19 8:53 ` Michael Koch
0 siblings, 2 replies; 8+ messages in thread
From: Michael Niedermayer @ 2024-01-09 1:55 UTC (permalink / raw)
To: FFmpeg development discussions and patches
This is the 64bit version of Chris Doty-Humphreys SFC64
Compared to the LCGs these produce much better quality numbers.
Compared to LFGs this needs less state. (our LFG has 224 byte
state for its 32bit version) this has 32byte state
Also the initialization for our LFG is slower.
This is also much faster than KISS or PCG.
This could be merged with the change to integer LCG
Also a few fate tests need an update. I will update fate if SFC64
is the chosen PRNG
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
libavutil/eval.c | 26 ++++++++++++--------
libavutil/sfc64.h | 59 +++++++++++++++++++++++++++++++++++++++++++++
tests/ref/fate/eval | 2 +-
3 files changed, 76 insertions(+), 11 deletions(-)
create mode 100644 libavutil/sfc64.h
diff --git a/libavutil/eval.c b/libavutil/eval.c
index 9d41140056c..d15becf9cda 100644
--- a/libavutil/eval.c
+++ b/libavutil/eval.c
@@ -33,6 +33,7 @@
#include "eval.h"
#include "ffmath.h"
#include "internal.h"
+#include "sfc64.h"
#include "log.h"
#include "mathematics.h"
#include "time.h"
@@ -55,7 +56,7 @@ typedef struct Parser {
void *log_ctx;
#define VARS 10
double *var;
- uint64_t *var_uint64;
+ SFC64 *prng_state;
} Parser;
static const AVClass eval_class = {
@@ -174,7 +175,7 @@ struct AVExpr {
} a;
struct AVExpr *param[3];
double *var;
- uint64_t *var_uint64;
+ SFC64 *prng_state;
};
static double etime(double v)
@@ -233,10 +234,15 @@ static double eval_expr(Parser *p, AVExpr *e)
#define COMPUTE_NEXT_RANDOM() \
int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
- uint64_t r = p->var_uint64[idx] ? p->var_uint64[idx] : (isnan(p->var[idx]) ? 0 : p->var[idx]);\
- r = r * 1664525 + 1013904223; \
+ SFC64 *s = p->prng_state + idx; \
+ uint64_t r; \
+ \
+ if (!s->counter) { \
+ r = isnan(p->var[idx]) ? 0 : p->var[idx]; \
+ sfc64_init(s, r, r, r, 12); \
+ } \
+ r = sfc64_get(s); \
p->var[idx] = r; \
- p->var_uint64[idx]= r;
case e_random: {
COMPUTE_NEXT_RANDOM();
@@ -334,7 +340,7 @@ static double eval_expr(Parser *p, AVExpr *e)
case e_last:return e->value * d2;
case e_st : {
int index = av_clip(d, 0, VARS-1);
- p->var_uint64[index] = 0;
+ p->prng_state[index].counter = 0;
return e->value * (p->var[index]= d2);
}
case e_hypot:return e->value * hypot(d, d2);
@@ -356,7 +362,7 @@ void av_expr_free(AVExpr *e)
av_expr_free(e->param[1]);
av_expr_free(e->param[2]);
av_freep(&e->var);
- av_freep(&e->var_uint64);
+ av_freep(&e->prng_state);
av_freep(&e);
}
@@ -744,8 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
goto end;
}
e->var= av_mallocz(sizeof(double) *VARS);
- e->var_uint64= av_mallocz(sizeof(uint64_t) *VARS);
- if (!e->var || !e->var_uint64) {
+ e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
+ if (!e->var || !e->prng_state) {
ret = AVERROR(ENOMEM);
goto end;
}
@@ -787,7 +793,7 @@ double av_expr_eval(AVExpr *e, const double *const_values, void *opaque)
{
Parser p = { 0 };
p.var= e->var;
- p.var_uint64= e->var_uint64;
+ p.prng_state= e->prng_state;
p.const_values = const_values;
p.opaque = opaque;
diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
new file mode 100644
index 00000000000..25bc43abef1
--- /dev/null
+++ b/libavutil/sfc64.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2024 Michael Niedermayer <michael-ffmpeg@niedermayer.cc>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * This is a implementation of SFC64 a 64-bit PRNG by Chris Doty-Humphrey.
+ *
+ * This Generator is much faster (0m1.872s) than 64bit KISS (0m3.823s) and PCG-XSH-RR-64/32 (0m2.700s)
+ * And passes testu01 and practrand test suits.
+ *
+ */
+
+/**
+ * @file
+ * simple Pseudo Random Number Generator
+ *
+ */
+
+#ifndef AVUTIL_SFC64_H
+#define AVUTIL_SFC64_H
+
+#include <inttypes.h>
+
+typedef struct SFC64 {
+ uint64_t a,b,c,counter;
+} SFC64;
+
+static inline uint64_t sfc64_get(SFC64 *s) {
+ uint64_t tmp = s->a + s->b + s->counter++;
+ s->a = s->b ^ (s->b >> 11);
+ s->b = s->c + (s->c << 3); // This is a multiply by 9
+ s->c = ((s->c << 24) | (s->c >> 40)) + tmp;
+ return tmp;
+}
+
+static inline void sfc64_init(SFC64 *s, uint64_t seeda, uint64_t seedb, uint64_t seedc, int rounds) {
+ s->a = seeda;
+ s->b = seedb;
+ s->c = seedc;
+ s->counter = 1;
+ while (rounds--)
+ sfc64_get(s);
+}
+
+#endif // AVUTIL_SFC64_H
diff --git a/tests/ref/fate/eval b/tests/ref/fate/eval
index 5b4d93f4274..441f9846c46 100644
--- a/tests/ref/fate/eval
+++ b/tests/ref/fate/eval
@@ -257,7 +257,7 @@ Evaluating 'root(sin(ld(0))+6+sin(ld(0)/12)-log(ld(0)), 100)'
'root(sin(ld(0))+6+sin(ld(0)/12)-log(ld(0)), 100)' -> 60.965601
Evaluating '7000000B*random(0)'
-'7000000B*random(0)' -> 0.003078
+'7000000B*random(0)' -> 12864914.486611
Evaluating 'squish(2)'
'squish(2)' -> 0.000335
--
2.17.1
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2024-01-20 0:33 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-13 3:51 [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG Michael Niedermayer
2024-01-14 14:14 ` Stefano Sabatini
2024-01-16 0:27 ` Michael Niedermayer
-- strict thread matches above, loose matches on Subject: below --
2024-01-09 1:55 Michael Niedermayer
2024-01-10 22:48 ` Stefano Sabatini
2024-01-11 2:39 ` Michael Niedermayer
2024-01-19 8:53 ` Michael Koch
2024-01-20 0:33 ` Michael Niedermayer
Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
This inbox may be cloned and mirrored by anyone:
git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git
# If you have public-inbox 1.1+ installed, you may
# initialize and index your mirror using the following commands:
public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
ffmpegdev@gitmailbox.com
public-inbox-index ffmpegdev
Example config snippet for mirrors.
AGPL code for this site: git clone https://public-inbox.org/public-inbox.git