Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG

2024-01-10 Thread Stefano Sabatini
On date Tuesday 2024-01-09 02:55:21 +0100, Michael Niedermayer wrote:
> This is the 64bit version of Chris Doty-Humphreys SFC64
> 
> Compared to the LCGs these produce much better quality numbers.
> Compared to LFGs this needs less state. (our LFG has 224 byte
> state for its 32bit version) this has 32byte state
> Also the initialization for our LFG is slower.
> This is also much faster than KISS or PCG.
> 
> This could be merged with the change to integer LCG
> Also a few fate tests need an update. I will update fate if SFC64
> is the chosen PRNG
> 
> Signed-off-by: Michael Niedermayer 
> ---
>  libavutil/eval.c| 26 
>  libavutil/sfc64.h   | 59 +
>  tests/ref/fate/eval |  2 +-
>  3 files changed, 76 insertions(+), 11 deletions(-)

cool :-)

>  create mode 100644 libavutil/sfc64.h
> 
> diff --git a/libavutil/eval.c b/libavutil/eval.c
> index 9d41140056c..d15becf9cda 100644
> --- a/libavutil/eval.c
> +++ b/libavutil/eval.c
> @@ -33,6 +33,7 @@
>  #include "eval.h"
>  #include "ffmath.h"
>  #include "internal.h"
> +#include "sfc64.h"

nit: sort order

>  #include "log.h"
>  #include "mathematics.h"
>  #include "time.h"
> @@ -55,7 +56,7 @@ typedef struct Parser {
>  void *log_ctx;
>  #define VARS 10
>  double *var;
> -uint64_t *var_uint64;
> +SFC64 *prng_state;
>  } Parser;

this is on top of another patch I guess

>  
>  static const AVClass eval_class = {
> @@ -174,7 +175,7 @@ struct AVExpr {
>  } a;
>  struct AVExpr *param[3];
>  double *var;
> -uint64_t *var_uint64;
> +SFC64 *prng_state;
>  };
>  
>  static double etime(double v)
> @@ -233,10 +234,15 @@ static double eval_expr(Parser *p, AVExpr *e)
>  
>  #define COMPUTE_NEXT_RANDOM()\
>  int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
> -uint64_t r = p->var_uint64[idx] ? p->var_uint64[idx] : 
> (isnan(p->var[idx]) ? 0 : p->var[idx]);\
> -r = r * 1664525 + 1013904223;\
> +SFC64 *s = p->prng_state + idx;  \
> +uint64_t r;  \
> + \
> +if (!s->counter) {   \
> +r = isnan(p->var[idx]) ? 0 : p->var[idx];\

> +sfc64_init(s, r, r, r, 12);  \

for the record, why 12?

> +}\
> +r = sfc64_get(s);\
>  p->var[idx] = r; \
> -p->var_uint64[idx]= r;

>  
>  case e_random: {
>  COMPUTE_NEXT_RANDOM();
> @@ -334,7 +340,7 @@ static double eval_expr(Parser *p, AVExpr *e)
>  case e_last:return e->value * d2;
>  case e_st :  {
>  int index = av_clip(d, 0, VARS-1);

> -p->var_uint64[index] = 0;
> +p->prng_state[index].counter = 0;

I wonder if we should have a dedicated strandom() (or randomst)
function to store the value (and deprecate st for setting the random
seed, now that we are using a separated variable to store the state) -
not blocking though

>  return e->value * (p->var[index]= d2);
>  }
>  case e_hypot:return e->value * hypot(d, d2);
> @@ -356,7 +362,7 @@ void av_expr_free(AVExpr *e)
>  av_expr_free(e->param[1]);
>  av_expr_free(e->param[2]);
>  av_freep(&e->var);
> -av_freep(&e->var_uint64);
> +av_freep(&e->prng_state);
>  av_freep(&e);
>  }
>  
> @@ -744,8 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
>  goto end;
>  }
>  e->var= av_mallocz(sizeof(double) *VARS);
> -e->var_uint64= av_mallocz(sizeof(uint64_t) *VARS);
> -if (!e->var || !e->var_uint64) {
> +e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
> +if (!e->var || !e->prng_state) {
>  ret = AVERROR(ENOMEM);
>  goto end;
>  }
> @@ -787,7 +793,7 @@ double av_expr_eval(AVExpr *e, const double 
> *const_values, void *opaque)
>  {
>  Parser p = { 0 };
>  p.var= e->var;
> -p.var_uint64= e->var_uint64;
> +p.prng_state= e->prng_state;
>  
>  p.const_values = const_values;
>  p.opaque = opaque;
> diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
> new file mode 100644
> index 000..25bc43abef1
> --- /dev/null
> +++ b/libavutil/sfc64.h
> @@ -0,0 +1,59 @@
> +/*
> + * Copyright (c) 2024 Michael Niedermayer 
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of

Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG

2024-01-10 Thread Michael Niedermayer
On Wed, Jan 10, 2024 at 11:48:33PM +0100, Stefano Sabatini wrote:
> On date Tuesday 2024-01-09 02:55:21 +0100, Michael Niedermayer wrote:
[...]

> >  
> >  static const AVClass eval_class = {
> > @@ -174,7 +175,7 @@ struct AVExpr {
> >  } a;
> >  struct AVExpr *param[3];
> >  double *var;
> > -uint64_t *var_uint64;
> > +SFC64 *prng_state;
> >  };
> >  
> >  static double etime(double v)
> > @@ -233,10 +234,15 @@ static double eval_expr(Parser *p, AVExpr *e)
> >  
> >  #define COMPUTE_NEXT_RANDOM()\
> >  int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
> > -uint64_t r = p->var_uint64[idx] ? p->var_uint64[idx] : 
> > (isnan(p->var[idx]) ? 0 : p->var[idx]);\
> > -r = r * 1664525 + 1013904223;\
> > +SFC64 *s = p->prng_state + idx;  \
> > +uint64_t r;  \
> > + \
> > +if (!s->counter) {   \
> > +r = isnan(p->var[idx]) ? 0 : p->var[idx];\
> 
> > +sfc64_init(s, r, r, r, 12);  \
> 
> for the record, why 12?

The reference has 3 init functions
* one that uses one seed for the 3 parameters, it uses 12 rounds
* one that uses 3 seperate seeds that uses 18 rounds
* one that has "fast" in its name and does 8 rounds with one seed in 3 
parameters

I will document this better


[...]
> >  return e->value * (p->var[index]= d2);
> >  }
> >  case e_hypot:return e->value * hypot(d, d2);
> > @@ -356,7 +362,7 @@ void av_expr_free(AVExpr *e)
> >  av_expr_free(e->param[1]);
> >  av_expr_free(e->param[2]);
> >  av_freep(&e->var);
> > -av_freep(&e->var_uint64);
> > +av_freep(&e->prng_state);
> >  av_freep(&e);
> >  }
> >  
> > @@ -744,8 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
> >  goto end;
> >  }
> >  e->var= av_mallocz(sizeof(double) *VARS);
> > -e->var_uint64= av_mallocz(sizeof(uint64_t) *VARS);
> > -if (!e->var || !e->var_uint64) {
> > +e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
> > +if (!e->var || !e->prng_state) {
> >  ret = AVERROR(ENOMEM);
> >  goto end;
> >  }
> > @@ -787,7 +793,7 @@ double av_expr_eval(AVExpr *e, const double 
> > *const_values, void *opaque)
> >  {
> >  Parser p = { 0 };
> >  p.var= e->var;
> > -p.var_uint64= e->var_uint64;
> > +p.prng_state= e->prng_state;
> >  
> >  p.const_values = const_values;
> >  p.opaque = opaque;
> > diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
> > new file mode 100644
> > index 000..25bc43abef1
> > --- /dev/null
> > +++ b/libavutil/sfc64.h
> > @@ -0,0 +1,59 @@
> > +/*
> > + * Copyright (c) 2024 Michael Niedermayer 
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
> > 02110-1301 USA
> > + *
> 
> > + * This is a implementation of SFC64 a 64-bit PRNG by Chris Doty-Humphrey.
> 
> nit: This is a implementation of SFC64, a 64-bit PRNG by Chris Doty-Humphrey.
> 

> > + *
> > + * This Generator is much faster (0m1.872s) than 64bit KISS (0m3.823s) and 
> > PCG-XSH-RR-64/32 (0m2.700s)
> 
> what are these benchmarks against?

a loop that computes alot of random numbers and at the end prints their sum.

The behavior was btw quite different if the numbers are not summed and printed
as the compiler can then optimize some things out but noone would run a PRNG
and not use the values.


[...]
> > +static inline uint64_t sfc64_get(SFC64 *s) {
> > +uint64_t tmp = s->a + s->b + s->counter++;
> > +s->a = s->b ^ (s->b >> 11);
> > +s->b = s->c + (s->c << 3); // This is a multiply by 9
> > +s->c = ((s->c << 24) | (s->c >> 40)) + tmp;
> > +return tmp;
> > +}
> > +
> > +static inline void sfc64_init(SFC64 *s, uint64_t seeda, uint64_t seedb, 
> > uint64_t seedc, int rounds) {
> > +s->a   = seeda;
> > +s->b   = seedb;
> > +s->c   = seedc;
> > +s->counter = 1;
> > +while (rounds--)
> > +   

Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG

2024-01-14 Thread Stefano Sabatini
On date Saturday 2024-01-13 04:51:06 +0100, Michael Niedermayer wrote:
> This is the 64bit version of Chris Doty-Humphreys SFC64
> 
> Compared to the LCGs these produce much better quality numbers.
> Compared to LFGs this needs less state. (our LFG has 224 byte
> state for its 32bit version) this has 32byte state
> Also the initialization for our LFG is slower.
> This is also much faster than KISS or PCG.
> 
> This commit replaces the broken LCG used before.
> (broken as it had only a period ~200M due to being put in a double)
> 
> This changes the output from random() which is why libswresample.mak
> is updated, update was done using the command in libswresample.mak
> 
> Signed-off-by: Michael Niedermayer 
> ---
>  libavutil/eval.c |  24 +++-
>  libavutil/sfc64.h|  85 ++
>  tests/fate/libswresample.mak | 208 +--
>  tests/ref/fate/eval  |   2 +-
>  4 files changed, 210 insertions(+), 109 deletions(-)
>  create mode 100644 libavutil/sfc64.h
> 
> diff --git a/libavutil/eval.c b/libavutil/eval.c
> index dc6b3697bc2..349015d4fa3 100644
> --- a/libavutil/eval.c
> +++ b/libavutil/eval.c
> @@ -35,6 +35,7 @@
>  #include "internal.h"
>  #include "log.h"
>  #include "mathematics.h"
> +#include "sfc64.h"
>  #include "time.h"
>  #include "avstring.h"
>  #include "timer.h"
> @@ -55,6 +56,7 @@ typedef struct Parser {
>  void *log_ctx;
>  #define VARS 10
>  double *var;
> +FFSFC64 *prng_state;
>  } Parser;
>  
>  static const AVClass eval_class = {
> @@ -173,6 +175,7 @@ struct AVExpr {
>  } a;
>  struct AVExpr *param[3];
>  double *var;
> +FFSFC64 *prng_state;
>  };
>  
>  static double etime(double v)
> @@ -231,8 +234,14 @@ static double eval_expr(Parser *p, AVExpr *e)
>  
>  #define COMPUTE_NEXT_RANDOM()\
>  int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
> -uint64_t r = isnan(p->var[idx]) ? 0 : p->var[idx];   \
> -r = r * 1664525 + 1013904223;\
> +FFSFC64 *s = p->prng_state + idx;\
> +uint64_t r;  \
> + \
> +if (!s->counter) {   \
> +r = isnan(p->var[idx]) ? 0 : p->var[idx];\
> +ff_sfc64_init(s, r, r, r, 12);   \
> +}\
> +r = ff_sfc64_get(s); \
>  p->var[idx] = r; \
>  
>  case e_random: {
> @@ -329,7 +338,11 @@ static double eval_expr(Parser *p, AVExpr *e)
>  case e_div: return e->value * (d2 ? (d / d2) : d * INFINITY);
>  case e_add: return e->value * (d + d2);
>  case e_last:return e->value * d2;
> -case e_st : return e->value * (p->var[av_clip(d, 0, 
> VARS-1)]= d2);
> +case e_st :  {
> +int index = av_clip(d, 0, VARS-1);
> +p->prng_state[index].counter = 0;
> +return e->value * (p->var[index]= d2);
> +}
>  case e_hypot:return e->value * hypot(d, d2);
>  case e_atan2:return e->value * atan2(d, d2);
>  case e_bitand: return isnan(d) || isnan(d2) ? NAN : e->value 
> * ((long int)d & (long int)d2);
> @@ -349,6 +362,7 @@ void av_expr_free(AVExpr *e)
>  av_expr_free(e->param[1]);
>  av_expr_free(e->param[2]);
>  av_freep(&e->var);
> +av_freep(&e->prng_state);
>  av_freep(&e);
>  }
>  
> @@ -736,7 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
>  goto end;
>  }
>  e->var= av_mallocz(sizeof(double) *VARS);
> -if (!e->var) {
> +e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
> +if (!e->var || !e->prng_state) {
>  ret = AVERROR(ENOMEM);
>  goto end;
>  }
> @@ -778,6 +793,7 @@ double av_expr_eval(AVExpr *e, const double 
> *const_values, void *opaque)
>  {
>  Parser p = { 0 };
>  p.var= e->var;
> +p.prng_state= e->prng_state;
>  
>  p.const_values = const_values;
>  p.opaque = opaque;
> diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
> new file mode 100644
> index 000..05f1e84cc68
> --- /dev/null
> +++ b/libavutil/sfc64.h
> @@ -0,0 +1,85 @@
> +/*
> + * Copyright (c) 2024 Michael Niedermayer 
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope

Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG

2024-01-15 Thread Michael Niedermayer
On Sun, Jan 14, 2024 at 03:14:23PM +0100, Stefano Sabatini wrote:
> On date Saturday 2024-01-13 04:51:06 +0100, Michael Niedermayer wrote:
> > This is the 64bit version of Chris Doty-Humphreys SFC64
> > 
> > Compared to the LCGs these produce much better quality numbers.
> > Compared to LFGs this needs less state. (our LFG has 224 byte
> > state for its 32bit version) this has 32byte state
> > Also the initialization for our LFG is slower.
> > This is also much faster than KISS or PCG.
> > 
> > This commit replaces the broken LCG used before.
> > (broken as it had only a period ~200M due to being put in a double)
> > 
> > This changes the output from random() which is why libswresample.mak
> > is updated, update was done using the command in libswresample.mak
> > 
> > Signed-off-by: Michael Niedermayer 
> > ---
> >  libavutil/eval.c |  24 +++-
> >  libavutil/sfc64.h|  85 ++
> >  tests/fate/libswresample.mak | 208 +--
> >  tests/ref/fate/eval  |   2 +-
> >  4 files changed, 210 insertions(+), 109 deletions(-)
> >  create mode 100644 libavutil/sfc64.h
> > 
> > diff --git a/libavutil/eval.c b/libavutil/eval.c
> > index dc6b3697bc2..349015d4fa3 100644
> > --- a/libavutil/eval.c
> > +++ b/libavutil/eval.c
> > @@ -35,6 +35,7 @@
> >  #include "internal.h"
> >  #include "log.h"
> >  #include "mathematics.h"
> > +#include "sfc64.h"
> >  #include "time.h"
> >  #include "avstring.h"
> >  #include "timer.h"
> > @@ -55,6 +56,7 @@ typedef struct Parser {
> >  void *log_ctx;
> >  #define VARS 10
> >  double *var;
> > +FFSFC64 *prng_state;
> >  } Parser;
> >  
> >  static const AVClass eval_class = {
> > @@ -173,6 +175,7 @@ struct AVExpr {
> >  } a;
> >  struct AVExpr *param[3];
> >  double *var;
> > +FFSFC64 *prng_state;
> >  };
> >  
> >  static double etime(double v)
> > @@ -231,8 +234,14 @@ static double eval_expr(Parser *p, AVExpr *e)
> >  
> >  #define COMPUTE_NEXT_RANDOM()\
> >  int idx = av_clip(eval_expr(p, e->param[0]), 0, VARS-1); \
> > -uint64_t r = isnan(p->var[idx]) ? 0 : p->var[idx];   \
> > -r = r * 1664525 + 1013904223;\
> > +FFSFC64 *s = p->prng_state + idx;\
> > +uint64_t r;  \
> > + \
> > +if (!s->counter) {   \
> > +r = isnan(p->var[idx]) ? 0 : p->var[idx];\
> > +ff_sfc64_init(s, r, r, r, 12);   \
> > +}\
> > +r = ff_sfc64_get(s); \
> >  p->var[idx] = r; \
> >  
> >  case e_random: {
> > @@ -329,7 +338,11 @@ static double eval_expr(Parser *p, AVExpr *e)
> >  case e_div: return e->value * (d2 ? (d / d2) : d * 
> > INFINITY);
> >  case e_add: return e->value * (d + d2);
> >  case e_last:return e->value * d2;
> > -case e_st : return e->value * (p->var[av_clip(d, 0, 
> > VARS-1)]= d2);
> > +case e_st :  {
> > +int index = av_clip(d, 0, VARS-1);
> > +p->prng_state[index].counter = 0;
> > +return e->value * (p->var[index]= d2);
> > +}
> >  case e_hypot:return e->value * hypot(d, d2);
> >  case e_atan2:return e->value * atan2(d, d2);
> >  case e_bitand: return isnan(d) || isnan(d2) ? NAN : 
> > e->value * ((long int)d & (long int)d2);
> > @@ -349,6 +362,7 @@ void av_expr_free(AVExpr *e)
> >  av_expr_free(e->param[1]);
> >  av_expr_free(e->param[2]);
> >  av_freep(&e->var);
> > +av_freep(&e->prng_state);
> >  av_freep(&e);
> >  }
> >  
> > @@ -736,7 +750,8 @@ int av_expr_parse(AVExpr **expr, const char *s,
> >  goto end;
> >  }
> >  e->var= av_mallocz(sizeof(double) *VARS);
> > -if (!e->var) {
> > +e->prng_state = av_mallocz(sizeof(*e->prng_state) *VARS);
> > +if (!e->var || !e->prng_state) {
> >  ret = AVERROR(ENOMEM);
> >  goto end;
> >  }
> > @@ -778,6 +793,7 @@ double av_expr_eval(AVExpr *e, const double 
> > *const_values, void *opaque)
> >  {
> >  Parser p = { 0 };
> >  p.var= e->var;
> > +p.prng_state= e->prng_state;
> >  
> >  p.const_values = const_values;
> >  p.opaque = opaque;
> > diff --git a/libavutil/sfc64.h b/libavutil/sfc64.h
> > new file mode 100644
> > index 000..05f1e84cc68
> > --- /dev/null
> > +++ b/libavutil/sfc64.h
> > @@ -0,0 +1,85 @@
> > +/*
> > + * Copyright (c) 2024 Michael Niedermayer 
> > + *
> > + * This file is part of FFmpeg.
> > +

Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG

2024-01-19 Thread Michael Koch
There is still a small problem with the random generator, but this has 
nothing to do with the recent changes.
If the random() expression is used in the geq filter, then multiple 
pixels get the same sequence of random numbers.

As can be shown with this command, where the frame has only two pixels:

ffmpeg -loglevel repeat -f lavfi -i nullsrc=size=1x2,format=gray -vf 
"geq=lum='print(random(0));print(random(0));print(random(0))'" -frames 1 
-y out.png


I think it's because the filter is executed in multiple threads.
-filter_threads 1 fixes the problem, but it slows down the whole filter 
thread.


Michael


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avutil/eval: Use even better PRNG

2024-01-19 Thread Michael Niedermayer
On Fri, Jan 19, 2024 at 09:53:46AM +0100, Michael Koch wrote:
> There is still a small problem with the random generator, but this has
> nothing to do with the recent changes.
> If the random() expression is used in the geq filter, then multiple pixels
> get the same sequence of random numbers.
> As can be shown with this command, where the frame has only two pixels:
> 
> ffmpeg -loglevel repeat -f lavfi -i nullsrc=size=1x2,format=gray -vf
> "geq=lum='print(random(0));print(random(0));print(random(0))'" -frames 1 -y
> out.png
> 
> I think it's because the filter is executed in multiple threads.
> -filter_threads 1 fixes the problem, but it slows down the whole filter
> thread.

You can avoid this by using
ifnot(X,st(0,Y))

which would reseed the random number generator differently on the first pixel of
each line
Not sure this is the best solution, better ideas are welcome

thx

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Some people wanted to paint the bikeshed green, some blue and some pink.
People argued and fought, when they finally agreed, only rust was left.


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".