URL: <https://savannah.gnu.org/bugs/?59900>
Summary: Add truncated normal distribution Project: GNU Scientific Library Submitted by: psa Submitted on: Sun 17 Jan 2021 10:26:25 PM UTC Category: None Severity: 3 - Normal Operating System: Status: None Assigned to: None Open/Closed: Open Release: Discussion Lock: Any _______________________________________________________ Details: from msunet =at= shellblade =dot= net Please find attached an implementation of a truncated normal distribution based on John Burkardt's presentation linked below. Please note that I am neither a mathematician nor an expert in numerical computing, so any feedback is appreciated. This is also my first contribution to GSL (and a GNU project in general), so please excuse any inefficiencies. Thank you, Marc Adds a truncated normal distribution as described in John Burkardt's "The Truncated Normal Distribution": https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf --- cdf/gauss.c | 22 ++++++++++++++++ cdf/gaussinv.c | 20 ++++++++++++++ cdf/gsl_cdf.h | 6 +++++ cdf/test.c | 61 +++++++++++++++++++++++++++++++++++++++++++ filter/Makefile.am | 2 +- movstat/Makefile.am | 2 +- randist/gauss.c | 19 ++++++++++++++ randist/gsl_randist.h | 3 +++ randist/test.c | 20 ++++++++++++++ rstat/Makefile.am | 2 +- 10 files changed, 154 insertions(+), 3 deletions(-) diff --git a/cdf/gauss.c b/cdf/gauss.c index cdc8b0a3..110e58f1 100644 --- a/cdf/gauss.c +++ b/cdf/gauss.c @@ -349,3 +349,25 @@ gsl_cdf_gaussian_Q (const double x, const double sigma) { return gsl_cdf_ugaussian_Q (x / sigma); } + +double +gsl_cdf_tgaussian_P (const double x, const double a, const double b, const double sigma) +{ + if (x <= a) + { + return 0.0; + } + if (x >= b) + { + return 1.0; + } + const double n = gsl_cdf_gaussian_P (x, sigma) - gsl_cdf_gaussian_P (a, sigma); + const double d = gsl_cdf_gaussian_P (b, sigma) - gsl_cdf_gaussian_P (a, sigma); + return n / d; +} + +double +gsl_cdf_tgaussian_Q (const double x, const double a, const double b, const double sigma) +{ + return 1.0 - gsl_cdf_tgaussian_P (x, a, b, sigma); +} diff --git a/cdf/gaussinv.c b/cdf/gaussinv.c index dcbdca1d..28c9866c 100644 --- a/cdf/gaussinv.c +++ b/cdf/gaussinv.c @@ -200,3 +200,23 @@ gsl_cdf_gaussian_Qinv (const double Q, const double sigma) { return sigma * gsl_cdf_ugaussian_Qinv (Q); } + +double +gsl_cdf_tgaussian_Pinv (const double P, const double a, const double b, const double sigma) +{ + const double k + = gsl_cdf_gaussian_P (a, sigma) + + P * (gsl_cdf_gaussian_P (b, sigma) - gsl_cdf_gaussian_P (a, sigma)); + + return gsl_cdf_gaussian_Pinv (k, sigma); +} + +double +gsl_cdf_tgaussian_Qinv (const double Q, const double a, const double b, const double sigma) +{ + const double k + = gsl_cdf_gaussian_P (b, sigma) + - Q * (gsl_cdf_gaussian_P (b, sigma) - gsl_cdf_gaussian_P (a, sigma)); + + return gsl_cdf_gaussian_Pinv (k, sigma); +} diff --git a/cdf/gsl_cdf.h b/cdf/gsl_cdf.h index 2bc3fed5..61379a82 100644 --- a/cdf/gsl_cdf.h +++ b/cdf/gsl_cdf.h @@ -46,6 +46,12 @@ double gsl_cdf_gaussian_Q (const double x, const double sigma); double gsl_cdf_gaussian_Pinv (const double P, const double sigma); double gsl_cdf_gaussian_Qinv (const double Q, const double sigma); +double gsl_cdf_tgaussian_P (const double x, const double a, const double b, const double sigma); +double gsl_cdf_tgaussian_Q (const double x, const double a, const double b, const double sigma); + +double gsl_cdf_tgaussian_Pinv (const double P, const double a, const double b, const double sigma); +double gsl_cdf_tgaussian_Qinv (const double Q, const double a, const double b, const double sigma); + double gsl_cdf_gamma_P (const double x, const double a, const double b); double gsl_cdf_gamma_Q (const double x, const double a, const double b); diff --git a/cdf/test.c b/cdf/test.c index 1b4cf777..65eeaa6e 100644 --- a/cdf/test.c +++ b/cdf/test.c @@ -39,6 +39,8 @@ void test_ugaussian (void); void test_ugaussianinv (void); +void test_tgaussian (void); +void test_tgaussianinv (void); void test_exponential (void); void test_exponentialinv (void); void test_exppow (void); @@ -398,6 +400,7 @@ main (void) Function values computed with PARI, 28 digits precision */ test_ugaussian (); + test_tgaussian (); test_exponential (); test_exppow (); test_tdist (); @@ -407,6 +410,7 @@ main (void) test_beta (); test_ugaussianinv (); + test_tgaussianinv (); test_exponentialinv (); test_gammainv (); test_chisqinv (); @@ -535,6 +539,63 @@ void test_ugaussianinv (void) { } + /* Test values from John Burkardt, The Truncated Normal Distribution + Page 23. */ + +void test_tgaussian (void) +{ + TEST (gsl_cdf_tgaussian_P, (-18.37, -50.0, 50.0, 25.0), 0.218418626765711416, TEST_TOL0); + TEST (gsl_cdf_tgaussian_P, (37.962, -50.0, 50.0, 25.0), 0.956315769095676504, TEST_TOL0); + TEST (gsl_cdf_tgaussian_P, (22.367, -50.0, 50.0, 25.0), 0.82951388212098065, TEST_TOL0); + TEST (gsl_cdf_tgaussian_P, (3.704, -50.0, 50.0, 25.0), 0.561699074822187949, TEST_TOL0); + TEST (gsl_cdf_tgaussian_P, (-5.1, -50.0, 50.0, 25.0), 0.415323967636885893, TEST_TOL0); + TEST (gsl_cdf_tgaussian_P, (-34.1674, -50.0, 50.0, 25.0), 0.0661185873044326383, TEST_TOL0); + TEST (gsl_cdf_tgaussian_P, (-15.4257, -50.0, 50.0, 25.0), 0.257577857199668192, TEST_TOL0); + TEST (gsl_cdf_tgaussian_P, (-28.4328, -50.0, 50.0, 25.0), 0.109956874884255498, TEST_TOL0); + TEST (gsl_cdf_tgaussian_P, (-37.9346, -50.0, 50.0, 25.0), 0.0438289787355652244, TEST_TOL0); + TEST (gsl_cdf_tgaussian_P, (8.155, -50.0, 50.0, 25.0), 0.633958632430714042, TEST_TOL0); + + TEST (gsl_cdf_tgaussian_Q, (-18.37, -50.0, 50.0, 25.0), 0.7815813732342887, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Q, (37.962, -50.0, 50.0, 25.0), 0.043684230904323385, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Q, (22.367, -50.0, 50.0, 25.0), 0.17048611787901935, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Q, (3.704, -50.0, 50.0, 25.0), 0.43830092517781205, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Q, (-5.1, -50.0, 50.0, 25.0), 0.584676032363114162, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Q, (-34.1674, -50.0, 50.0, 25.0), 0.9338814126955673, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Q, (-15.4257, -50.0, 50.0, 25.0), 0.7424221428003318, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Q, (-28.4328, -50.0, 50.0, 25.0), 0.8900431251157445, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Q, (-37.9346, -50.0, 50.0, 25.0), 0.9561710212644348, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Q, (8.155, -50.0, 50.0, 25.0), 0.36604136756928596, TEST_TOL0); +} + + /* Test values from John Burkardt, The Truncated Normal Distribution + Page 24. */ + +void test_tgaussianinv (void) +{ + TEST (gsl_cdf_tgaussian_Pinv, (0.218418626765711305, -50.0, 50.0, 25.0), -18.37, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Pinv, (0.956315769095676504, -50.0, 50.0, 25.0), 37.961999999999982, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Pinv, (0.82951388212098065, -50.0, 50.0, 25.0), 22.367, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Pinv, (0.561699074822187949, -50.0, 50.0, 25.0), 3.7039999999999913, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Pinv, (0.415307593603450431, -50.0, 50.0, 25.0), -5.1009999999999982, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Pinv, (0.0661185873044326383, -50.0, 50.0, 25.0), -34.1674, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Pinv, (0.257577857199668192, -50.0, 50.0, 25.0), -15.4257, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Pinv, (0.109956874884255498, -50.0, 50.0, 25.0), -28.4328, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Pinv, (0.0438289787355652244, -50.0, 50.0, 25.0), -37.9346, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Pinv, (0.633958632430714042, -50.0, 50.0, 25.0), 8.155, TEST_TOL0); + + TEST (gsl_cdf_tgaussian_Qinv, (0.7815813732342887, -50.0, 50.0, 25.0), -18.37, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Qinv, (0.043684230904323496, -50.0, 50.0, 25.0), 37.961999999999982, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Qinv, (0.17048611787901935, -50.0, 50.0, 25.0), 22.3669999999999902, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Qinv, (0.43830092517781205, -50.0, 50.0, 25.0), 3.7039999999999913, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Qinv, (0.5846924063965495, -50.0, 50.0, 25.0), -5.10099999999999554, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Qinv, (0.9338814126955673, -50.0, 50.0, 25.0), -34.1674, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Qinv, (0.7424221428003318, -50.0, 50.0, 25.0), -15.4257000000000151, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Qinv, (0.8900431251157445, -50.0, 50.0, 25.0), -28.4328, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Qinv, (0.9561710212644348, -50.0, 50.0, 25.0), -37.9345999999999819, TEST_TOL0); + TEST (gsl_cdf_tgaussian_Qinv, (0.36604136756928596, -50.0, 50.0, 25.0), 8.155, TEST_TOL0); +} + + /* Tests for exponential cumulative distribution function Function values computed with PARI, 28 digits precision */ diff --git a/filter/Makefile.am b/filter/Makefile.am index 03684916..fa426c5d 100644 --- a/filter/Makefile.am +++ b/filter/Makefile.am @@ -12,4 +12,4 @@ check_PROGRAMS = test TESTS = $(check_PROGRAMS) test_SOURCES = test.c -test_LDADD = libgslfilter.la ../movstat/libgslmovstat.la ../statistics/libgslstatistics.la ../sort/libgslsort.la ../ieee-utils/libgslieeeutils.la ../randist/libgslrandist.la ../rng/libgslrng.la ../specfunc/libgslspecfunc.la ../complex/libgslcomplex.la ../err/libgslerr.la ../test/libgsltest.la ../vector/libgslvector.la ../blas/libgslblas.la ../cblas/libgslcblas.la ../block/libgslblock.la ../poly/libgslpoly.la ../sys/libgslsys.la ../utils/libutils.la +test_LDADD = libgslfilter.la ../movstat/libgslmovstat.la ../statistics/libgslstatistics.la ../sort/libgslsort.la ../ieee-utils/libgslieeeutils.la ../randist/libgslrandist.la ../rng/libgslrng.la ../specfunc/libgslspecfunc.la ../complex/libgslcomplex.la ../err/libgslerr.la ../test/libgsltest.la ../vector/libgslvector.la ../blas/libgslblas.la ../cblas/libgslcblas.la ../block/libgslblock.la ../poly/libgslpoly.la ../cdf/libgslcdf.la ../sys/libgslsys.la ../utils/libutils.la diff --git a/movstat/Makefile.am b/movstat/Makefile.am index 851b241b..3abd7ead 100644 --- a/movstat/Makefile.am +++ b/movstat/Makefile.am @@ -33,4 +33,4 @@ check_PROGRAMS = test TESTS = $(check_PROGRAMS) test_SOURCES = test.c -test_LDADD = libgslmovstat.la ../statistics/libgslstatistics.la ../sort/libgslsort.la ../ieee-utils/libgslieeeutils.la ../randist/libgslrandist.la ../rng/libgslrng.la ../specfunc/libgslspecfunc.la ../complex/libgslcomplex.la ../err/libgslerr.la ../test/libgsltest.la ../vector/libgslvector.la ../blas/libgslblas.la ../cblas/libgslcblas.la ../block/libgslblock.la ../sys/libgslsys.la ../utils/libutils.la +test_LDADD = libgslmovstat.la ../statistics/libgslstatistics.la ../sort/libgslsort.la ../ieee-utils/libgslieeeutils.la ../randist/libgslrandist.la ../rng/libgslrng.la ../specfunc/libgslspecfunc.la ../complex/libgslcomplex.la ../err/libgslerr.la ../test/libgsltest.la ../vector/libgslvector.la ../blas/libgslblas.la ../cblas/libgslcblas.la ../block/libgslblock.la ../cdf/libgslcdf.la ../sys/libgslsys.la ../utils/libutils.la diff --git a/randist/gauss.c b/randist/gauss.c index 1b3442ea..1a072186 100644 --- a/randist/gauss.c +++ b/randist/gauss.c @@ -20,6 +20,7 @@ #include <config.h> #include <math.h> +#include <gsl/gsl_cdf.h> #include <gsl/gsl_math.h> #include <gsl/gsl_rng.h> #include <gsl/gsl_randist.h> @@ -141,3 +142,21 @@ gsl_ran_ugaussian_pdf (const double x) return gsl_ran_gaussian_pdf (x, 1.0); } +double +gsl_ran_tgaussian (const gsl_rng * r, const double a, const double b, const double sigma) +{ + const double p = gsl_rng_uniform_pos (r); + return gsl_cdf_tgaussian_Pinv (p, a, b, sigma); +} + +double +gsl_ran_tgaussian_pdf (const double x, const double a, const double b, const double sigma) +{ + if (x <= a || x >= b) + { + return 0.0; + } + const double n = gsl_ran_gaussian_pdf (x, sigma); + const double d = gsl_cdf_gaussian_P (b, sigma) - gsl_cdf_gaussian_P (a, sigma); + return n / d; +} diff --git a/randist/gsl_randist.h b/randist/gsl_randist.h index d38ccb36..69027212 100644 --- a/randist/gsl_randist.h +++ b/randist/gsl_randist.h @@ -92,6 +92,9 @@ double gsl_ran_gaussian_tail_pdf (const double x, const double a, const double s double gsl_ran_ugaussian_tail (const gsl_rng * r, const double a); double gsl_ran_ugaussian_tail_pdf (const double x, const double a); +double gsl_ran_tgaussian (const gsl_rng * r, const double a, const double b, const double sigma); +double gsl_ran_tgaussian_pdf (const double x, const double a, const double b, const double sigma); + void gsl_ran_bivariate_gaussian (const gsl_rng * r, double sigma_x, double sigma_y, double rho, double *x, double *y); double gsl_ran_bivariate_gaussian_pdf (const double x, const double y, const double sigma_x, const double sigma_y, const double rho); diff --git a/randist/test.c b/randist/test.c index c841f8b9..1b644259 100644 --- a/randist/test.c +++ b/randist/test.c @@ -156,6 +156,8 @@ double test_ugaussian_ratio_method (void); double test_ugaussian_ratio_method_pdf (double x); double test_ugaussian_tail (void); double test_ugaussian_tail_pdf (double x); +double test_tgaussian (void); +double test_tgaussian_pdf (double x); double test_bivariate_gaussian1 (void); double test_bivariate_gaussian1_pdf (double x); double test_bivariate_gaussian2 (void); @@ -282,6 +284,11 @@ main (void) testMoments (FUNC (ugaussian), -1.0, 1.0, 0.6826895); testMoments (FUNC (ugaussian), 3.0, 3.5, 0.0011172689); testMoments (FUNC (ugaussian_tail), 3.0, 3.5, 0.0011172689 / 0.0013498981); + testMoments (FUNC (tgaussian), -3.0, 3.0, 1.0); + testMoments (FUNC (tgaussian), -9.0, -3.1, 0.0); + testMoments (FUNC (tgaussian), 3.1, 9.0, 0.0); + testMoments (FUNC (tgaussian), 0.0, 3.0, 0.5); + testMoments (FUNC (tgaussian), -3.0, 0.0, 0.5); testMoments (FUNC (exponential), 0.0, 1.0, 1 - exp (-0.5)); testMoments (FUNC (cauchy), 0.0, 10000.0, 0.5); @@ -339,6 +346,7 @@ main (void) testPDF (FUNC2 (gaussian_tail1)); testPDF (FUNC2 (gaussian_tail2)); testPDF (FUNC2 (ugaussian_tail)); + testPDF (FUNC2 (tgaussian)); testPDF (FUNC2 (bivariate_gaussian1)); testPDF (FUNC2 (bivariate_gaussian2)); @@ -1626,6 +1634,18 @@ test_ugaussian_tail_pdf (double x) return gsl_ran_ugaussian_tail_pdf (x, 3.0); } +double +test_tgaussian (void) +{ + return gsl_ran_tgaussian (r_global, -3.0, 3.0, 1.5); +} + +double +test_tgaussian_pdf (double x) +{ + return gsl_ran_tgaussian_pdf (x, -3.0, 3.0, 1.5); +} + double test_bivariate_gaussian1 (void) { diff --git a/rstat/Makefile.am b/rstat/Makefile.am index 85db2013..5dd18e7b 100644 --- a/rstat/Makefile.am +++ b/rstat/Makefile.am @@ -10,6 +10,6 @@ check_PROGRAMS = test TESTS = $(check_PROGRAMS) test_SOURCES = test.c -test_LDADD = libgslrstat.la ../statistics/libgslstatistics.la ../sort/libgslsort.la ../ieee-utils/libgslieeeutils.la ../randist/libgslrandist.la ../rng/libgslrng.la ../specfunc/libgslspecfunc.la ../complex/libgslcomplex.la ../err/libgslerr.la ../test/libgsltest.la ../sys/libgslsys.la ../utils/libutils.la ../vector/libgslvector.la +test_LDADD = libgslrstat.la ../statistics/libgslstatistics.la ../sort/libgslsort.la ../ieee-utils/libgslieeeutils.la ../randist/libgslrandist.la ../rng/libgslrng.la ../specfunc/libgslspecfunc.la ../complex/libgslcomplex.la ../err/libgslerr.la ../test/libgsltest.la ../sys/libgslsys.la ../utils/libutils.la ../vector/libgslvector.la ../cdf/libgslcdf.la -- 2.27.0 _______________________________________________________ Reply to this item at: <https://savannah.gnu.org/bugs/?59900> _______________________________________________ Message sent via Savannah https://savannah.gnu.org/