This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
The following commit(s) were added to refs/heads/master by this push: new 876b5ec STATISTICS-25: T-dist to use the complement of the regularized beta 876b5ec is described below commit 876b5ec9782a36887f08612bee5b0ee8917df6f2 Author: Alex Herbert <aherb...@apache.org> AuthorDate: Sat Jan 22 14:32:20 2022 +0000 STATISTICS-25: T-dist to use the complement of the regularized beta NUMBERS-181 improved the regularized beta function. This allows increasing the threshold for switching to the normal approximation. --- .../statistics/distribution/TDistribution.java | 56 +++++++++++----------- .../statistics/distribution/TDistributionTest.java | 48 +++++++++++-------- 2 files changed, 56 insertions(+), 48 deletions(-) diff --git a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TDistribution.java b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TDistribution.java index 7c93318..50fc554 100644 --- a/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TDistribution.java +++ b/commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/TDistribution.java @@ -108,8 +108,9 @@ public abstract class TDistribution extends AbstractContinuousDistribution { /** 2. */ private static final double TWO = 2; /** Number of degrees of freedom above which to use the normal distribution. - * This is used to check the CDF when the degrees of freedom is large. */ - private static final double DOF_THRESHOLD_NORMAL = 2.99e6; + * This is used to check the CDF when the degrees of freedom is large. + * Set to 1 / machine epsilon, 2^52, or 4.50e15. */ + private static final double DOF_THRESHOLD_NORMAL = 0x1.0p52; /** degreesOfFreedom / 2. */ private final double dofOver2; @@ -167,36 +168,33 @@ public abstract class TDistribution extends AbstractContinuousDistribution { if (x == 0) { return 0.5; } - final double df = getDegreesOfFreedom(); - if (df > DOF_THRESHOLD_NORMAL) { + final double v = getDegreesOfFreedom(); + + // This threshold may no longer be required. + // See STATISTICS-25. + if (v > DOF_THRESHOLD_NORMAL) { return STANDARD_NORMAL.cumulativeProbability(x); } - final double x2 = x * x; - // z = 1 / (1 + x^2/df) - // Simplify by multiplication by df - final double z = df / (df + x2); - - // The RegularizedBeta has the complement: - // I(z, a, b) = 1 - I(1 - z, a, b) - // This is used when z > (a + 1) / (2 + b + a). - // Detect this condition and directly use the complement. - if (z > (dofOver2 + 1) / (2.5 + dofOver2)) { - // zc = 1 - z; pc = 1 - p - final double zc = x2 / (df + x2); - final double pc = RegularizedBeta.value(zc, 0.5, dofOver2); - - return x < 0 ? - // 0.5 * p == 0.5 * (1 - pc) = 0.5 - 0.5 * pc - 0.5 - 0.5 * pc : - // 1 - 0.5 * p == 1 - 0.5 * (1 - pc) = 0.5 + 0.5 * pc - 0.5 + 0.5 * pc; - } - - final double p = RegularizedBeta.value(z, dofOver2, 0.5); - return x < 0 ? - 0.5 * p : - 1 - 0.5 * p; + // cdf(t) = 1 - 0.5 * I_x(t)(v/2, 1/2) + // where x(t) = v / (v + t^2) + // + // When t^2 << v using the regularized beta results + // in loss of precision. Use the complement instead: + // I[x](a,b) = 1 - I[1-x](b,a) + // x = v / (v + t^2) + // 1-x = t^2 / (v + t^2) + // Use the threshold documented in the Boost t-distribution: + // https://www.boost.org/doc/libs/1_78_0/libs/math/doc/html/math_toolkit/dist_ref/dists/students_t_dist.html + + final double t2 = x * x; + double z; + if (v < 2 * t2) { + z = RegularizedBeta.value(v / (v + t2), dofOver2, 0.5) / 2; + } else { + z = RegularizedBeta.complement(t2 / (v + t2), 0.5, dofOver2) / 2; + } + return x > 0 ? 1 - z : z; } @Override diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TDistributionTest.java index ab27303..106a1f3 100644 --- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TDistributionTest.java +++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TDistributionTest.java @@ -106,31 +106,41 @@ class TDistributionTest extends BaseContinuousDistributionTest { void testStatistics25() { final double[] df = {1, 10, 1e2, 1e3, 1e4, 1e5, 1e6, 2e6, 2.98e6, 2.99e6, 3e6, 4e6, - 1e7, 1e8, 1e9, 1e10}; + 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, + 1e17, 1e18}; // Generated from Python. - final double[] expected = {0.507956089912, - 0.509726595102, - 0.509947608093, - 0.509970024339, - 0.509972268782, - 0.509972493254, - 0.509972515701, - 0.509972516948, - 0.509972517358, - 0.509972517361, - 0.509972517364, - 0.509972517572, - 0.509972517946, - 0.50997251817, - 0.509972518193, - 0.509972518195}; + final double[] expected = {0.50795608991202579, + 0.50972659510159002, + 0.50994760809308259, + 0.50997002433945715, + 0.50997226878155033, + 0.50997249325358851, + 0.50997251570107027, + 0.50997251694815415, + 0.50997251735826887, + 0.50997251736106808, + 0.50997251736384874, + 0.50997251757169604, + 0.50997251794582121, + 0.50997251817029643, + 0.50997251819274392, + 0.50997251819498857, + 0.50997251819521316, + 0.50997251819523559, + 0.50997251819523781, + // 1e-14 follows + 0.50997251819523803, + 0.50997251819523803, + 0.50997251819523814, + 0.50997251819523814, + 0.50997251819523803}; final double c = 0.025; - final double tol = 1e-9; for (int i = 0; i < df.length; i++) { final TDistribution dist = TDistribution.of(df[i]); final double x = dist.cumulativeProbability(c); - Assertions.assertEquals(expected[i], x, tol); + Assertions.assertEquals(expected[i], x, 6 * Math.ulp(expected[i])); } } }