This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch dist-test2 in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit 7244c42babd59a4d592bb53587d287bb46a995af Author: aherbert <[email protected]> AuthorDate: Wed Sep 22 18:59:36 2021 +0100 New abstract base class for testing distributions --- .../BaseContinuousDistributionTest.java | 676 ++++++++++++++++++ .../distribution/BaseDiscreteDistributionTest.java | 754 +++++++++++++++++++++ .../distribution/BaseDistributionTest.java | 379 +++++++++++ .../distribution/BinomialDistributionTest.java | 207 +----- .../distribution/DistributionTestData.java | 729 ++++++++++++++++++++ .../distribution/NakagamiDistributionTest.java | 182 +---- .../commons/statistics/distribution/TestUtils.java | 32 + .../distribution/test.binomial.0.properties | 36 + .../distribution/test.binomial.1.properties | 32 + .../distribution/test.binomial.2.properties | 32 + .../distribution/test.binomial.3.properties | 39 ++ .../distribution/test.binomial.4.properties | 25 + .../distribution/test.binomial.5.properties | 25 + .../distribution/test.binomial.6.properties | 25 + .../distribution/test.nakagami.0.properties | 41 ++ .../distribution/test.nakagami.1.properties | 37 + .../distribution/test.nakagami.2.properties | 33 + src/main/resources/checkstyle/checkstyle.xml | 1 + 18 files changed, 2922 insertions(+), 363 deletions(-) diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseContinuousDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseContinuousDistributionTest.java new file mode 100644 index 0000000..40ed2d8 --- /dev/null +++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseContinuousDistributionTest.java @@ -0,0 +1,676 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Properties; +import java.util.stream.Stream; +import java.util.stream.Stream.Builder; +import org.apache.commons.math3.analysis.UnivariateFunction; +import org.apache.commons.math3.analysis.integration.BaseAbstractUnivariateIntegrator; +import org.apache.commons.math3.analysis.integration.IterativeLegendreGaussIntegrator; +import org.apache.commons.math3.util.MathArrays; +import org.apache.commons.rng.simple.RandomSource; +import org.apache.commons.statistics.distribution.DistributionTestData.ContinuousDistributionTestData; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestInstance.Lifecycle; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Abstract base class for {@link ContinuousDistribution} tests. + * + * <p>This class uses parameterized tests that are repeated for instances of a + * distribution. The distribution, test input and expected values are generated + * dynamically from properties files loaded from resources. + * + * <p>The class has a single instance (see {@link Lifecycle#PER_CLASS}) that loads properties + * files from resources on creation. Resource files are assumed to be in the corresponding package + * for the class and named sequentially from 0: + * <pre> + * test.distname.0.properties + * test.distname.1.properties + * test.distname.2.properties + * </pre> + * <p>Where {@code distname} is the name of the distribution. The name is dynamically + * created in {@link #getDistributionName()} and can be overridden by implementing classes. + * A single parameterization of a distribution is tested using a single properties file. + * + * <p>To test a distribution create a sub-class and override the following methods: + * <ul> + * <li>{@link #makeDistribution(Object...) makeDistribution(Object...)} - Creates the distribution from the parameters + * <li>{@link #makeInvalidParameters()} - Generate invalid parameters for the distribution + * <li>{@link #getParameterNames()} - Return the names of parameter accessors + * </ul> + * + * <p>The distribution is created using + * {@link #makeDistribution(Object...) makeDistribution(Object...)}. This should + * create an instance of the distribution using parameters defined in the properties file. + * The parameters are parsed from String values to the appropriate parameter object. Currently + * this supports Double and Integer; numbers can be unboxed and used to create the distribution. + * + * <p>Illegal arguments for the distribution are tested from all combinations provided by + * {@link #makeInvalidParameters()}. If there are no illegal arguments this method may return + * null to skip the test. + * + * <p>If the distribution provides parameter accessors then the child test class can return + * the accessor names using {@link #getParameterNames()}. The distribution method accessors + * will be detected and invoked using reflection. This method may return + * null to skip the test. + * + * <p>The properties file must contain parameters for the distribution, properties of the + * distribution (moments and bounds) and points to test the CDF and PDF with the expected values. + * This information can be used to evaluate the distribution CDF and PDF but also the survival + * function, consistency of the probability computations and random sampling. + * + * <p>Optionally: + * <ul> + * <li>Points for the PDF (and log PDF) can be specified. The default will use CDF points. + * Note: It is not expected that evaluation of the PDF will require different points to the CDF. + * <li>Points and expected values for the inverse CDF can be specified. The default will use + * an inverse mapping of the expected CDF values to the test points. + * <li>Expected values for the log PDF can be specified. The default will use + * {@link Math#log(double)} on the PDF values. + * <li>Points and expected values for the survival function can be specified. The default will use + * the expected CDF values (SF = 1 - CDF). + * <li>A tolerance for equality assertions. The default is 1e-4. It is recommended to update + * this conservative threshold. + * </ul> + * + * <p>If the distribution provides higher precision implementations of + * cumulative probability and/or survival probability as the values approach zero, then test + * points and expected values can be provided with a tolerance for equality assertions of + * high-precision computations. The default is 1e-22. + * + * <p>Note: All properties files are read during test initialization. Any errors in a single + * property file will throw an exception, invalidating the initialization and no tests + * will be executed. + * + * <p>The parameterized tests in this class are inherited. The tests are final and cannot be + * changed. This ensures each instance of a distribution is tested for all functionality in + * the {@link ContinuousDistribution} interface. Arguments to the parameterized tests are + * generated dynamically using methods of the same name. These can be over-ridden in child + * classes to alter parameters. Throwing a + * {@link org.opentest4j.TestAbortedException TestAbortedException} in this method will + * skip the test as the arguments will not be generated. + * + * <p>Each parameterized test is effectively static; it uses no instance data. + * To implement additional test cases with a specific distribution instance and test + * data, create a test in the child class and call the relevant test case to verify + * results. Note that it is recommended to use the properties file as this ensures the + * entire functionality of the distribution is tested for that parameterization. + * + * <p>Test data should be validated against reference tables or other packages where + * possible, and the source of the reference data and/or validation should be documented + * in the properties file or additional test cases as appropriate. + * + * <p>The properties file uses {@code key=value} pairs loaded using a + * {@link java.util.Properties} object. Values will be read as String and then parsed to + * numeric data, and data arrays. Multi-line values can use a {@code \} character. + * Data in the properties file will be converted to numbers using standard parsing + * functions appropriate to the primitive type, e.g. {@link Double#parseDouble(String)}. + * Special double values should use NaN, Infinity and -Infinity. + * + * <p>The following is a complete properties file for a distribution: + * <pre> + * parameters = 0.5 1.0 + * # Computed using XYZ + * mean = 1.0 + * variance = NaN + * # optional (default -Infinity) + * lower = 0 + * # optional (default Infinity) + * upper = Infinity + * # optional (default true) + * connected = false + * # optional (default 1e-4) + * tolerance = 1e-9 + * # optional (default 1e-22) + * tolerance.hp = 1e-30 + * cdf.points = 0, 0.2 + * cdf.values = 0.0, 0.5 + * # optional (default uses cdf.values) + * pdf.points = 0, 40000 + * pdf.values = 0.0,\ + * 0.0 + * # optional (default uses log pdf.values) + * logpdf.values = -1900.123, -Infinity + * # optional (default uses cdf.points and 1 - cdf.values) + * sf.points = 400 + * sf.values = 0.0 + * # optional high-precision CDF test + * cdf.hp.points = 1e-16 + * cdf.hp.values = 1.23e-17 + * # optional high-precision survival function test + * sf.hp.points = 9 + * sf.hp.values = 2.34e-18 + * # optional inverse CDF test (default maps cdf.values to cdf.points) + * icdf.values = 0.0, 0.5 + * ipdf.values = 0.0, 0.2 + * </pre> + * + * <p>See {@link NakagamiDistributionTest} for an example and the resource file {@code test.nakagami.0.properties}. + */ +@TestInstance(Lifecycle.PER_CLASS) +abstract class BaseContinuousDistributionTest + extends BaseDistributionTest<ContinuousDistribution, ContinuousDistributionTestData> { + + @Override + ContinuousDistributionTestData makeDistributionData(Properties properties) { + return new ContinuousDistributionTestData(properties); + } + + //------------------------ Methods to stream the test data ----------------------------- + + // The @MethodSource annotation will default to a no arguments method of the same name + // as the @ParameterizedTest method. These can be overridden by child classes to + // stream different arguments to the test case. + + /** + * Create a stream of arguments containing the distribution to test, the CDF test points and + * the test tolerance. + * + * @return the stream + */ + Stream<Arguments> streamCdfTestPoints() { + final Builder<Arguments> b = Stream.builder(); + final int[] size = {0}; + data.forEach(d -> { + final double[] p = d.getCdfPoints(); + if (TestUtils.getLength(p) == 0) { + return; + } + size[0]++; + b.accept(Arguments.of(namedDistribution(d.getParameters()), + namedArray("points", p), + d.getTolerance())); + }); + Assumptions.assumeTrue(size[0] != 0, () -> "Distribution has no data for test points"); + return b.build(); + } + + /** + * Create a stream of arguments containing the distribution to test, the PDF test points + * and values, and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testDensity() { + return stream(ContinuousDistributionTestData::getPdfPoints, + ContinuousDistributionTestData::getPdfValues, + ContinuousDistributionTestData::getTolerance, "pdf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the log PDF test points + * and values, and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testLogDensity() { + return stream(ContinuousDistributionTestData::getPdfPoints, + ContinuousDistributionTestData::getLogPdfValues, + ContinuousDistributionTestData::getTolerance, "logpdf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the CDF test points + * and values, and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testCumulativeProbability() { + return stream(ContinuousDistributionTestData::getCdfPoints, + ContinuousDistributionTestData::getCdfValues, + ContinuousDistributionTestData::getTolerance, "cdf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the survival function + * test points and values, and the test tolerance. + * + * <p>This defaults to using the CDF points. The survival function is tested as 1 - CDF. + * + * @return the stream + */ + Stream<Arguments> testSurvivalProbability() { + return stream(ContinuousDistributionTestData::getSfPoints, + ContinuousDistributionTestData::getSfValues, + ContinuousDistributionTestData::getTolerance, "sf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the CDF test points + * and values, and the test tolerance for high-precision computations. + * + * @return the stream + */ + Stream<Arguments> testCumulativeProbabilityHighPrecision() { + return stream(ContinuousDistributionTestData::getCdfHpPoints, + ContinuousDistributionTestData::getCdfHpValues, + ContinuousDistributionTestData::getHighPrecisionTolerance, "cdf.hp"); + } + + /** + * Create a stream of arguments containing the distribution to test, the survival function + * test points and values, and the test tolerance for high-precision computations. + * + * @return the stream + */ + Stream<Arguments> testSurvivalProbabilityHighPrecision() { + return stream(ContinuousDistributionTestData::getSfHpPoints, + ContinuousDistributionTestData::getSfHpValues, + ContinuousDistributionTestData::getHighPrecisionTolerance, "sf.hp"); + } + + /** + * Create a stream of arguments containing the distribution to test, the inverse CDF test points + * and values, and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testInverseCumulativeProbability() { + return stream(ContinuousDistributionTestData::getIcdfPoints, + ContinuousDistributionTestData::getIcdfValues, + ContinuousDistributionTestData::getTolerance, "icdf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the test points + * to evaluate the CDF and survival function, and the test tolerance. CDF + SF must equal 1. + * + * @return the stream + */ + Stream<Arguments> testSurvivalAndCumulativeProbabilityComplement() { + return streamCdfTestPoints(); + } + + /** + * Create a stream of arguments containing the distribution to test, the test points + * to evaluate the CDF and probability in a range, and the test tolerance. + * Used to test CDF(x1) - CDF(x0) = probability(x0, x1). + * + * @return the stream + */ + Stream<Arguments> testConsistency() { + return streamCdfTestPoints(); + } + + /** + * Stream the arguments to test the density integrals. The test + * integrates the density function between consecutive test points for the cumulative + * density function. The default tolerance is 1e-9. Override this method to change + * the tolerance. + * + * @return the stream + */ + Stream<Arguments> testDensityIntegrals() { + // Use a higher tolerance than the default of 1e-4 for the integrals + return stream(ContinuousDistributionTestData::getCdfPoints, ContinuousDistributionTestData::getCdfValues, d -> 1e-9, "cdf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the support + * lower and upper bound, and the support connect flag. + * + * @return the stream + */ + Stream<Arguments> testSupport() { + return data.stream().map(d -> { + return Arguments.of(namedDistribution(d.getParameters()), d.getLower(), d.getUpper(), d.isConnected()); + }); + } + + /** + * Create a stream of arguments containing the distribution to test, the mean + * and variance, and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testMoments() { + return data.stream().map(d -> { + return Arguments.of(namedDistribution(d.getParameters()), d.getMean(), d.getVariance(), d.getTolerance()); + }); + } + + //------------------------ Tests ----------------------------- + + // Tests are final. It is expected that the test can be modified by overriding + // the method used to stream the arguments, for example to use a specific tolerance + // for a test in preference to the tolerance defined in the properties file. + + /** + * Test that density calculations match expected values. + */ + @ParameterizedTest + @MethodSource + final void testDensity(ContinuousDistribution distribution, + double[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final double x = points[i]; + Assertions.assertEquals(values[i], + distribution.density(x), tolerance, + () -> "Incorrect probability density value returned for " + x); + } + } + + /** + * Test that logarithmic density calculations match expected values. + */ + @ParameterizedTest + @MethodSource + final void testLogDensity(ContinuousDistribution distribution, + double[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final double x = points[i]; + Assertions.assertEquals(values[i], + distribution.logDensity(x), tolerance, + () -> "Incorrect probability density value returned for " + x); + } + } + + /** + * Test that cumulative probability density calculations match expected values. + */ + @ParameterizedTest + @MethodSource + final void testCumulativeProbability(ContinuousDistribution distribution, + double[] points, + double[] values, + double tolerance) { + // verify cumulativeProbability(double) + for (int i = 0; i < points.length; i++) { + final double x = points[i]; + Assertions.assertEquals(values[i], + distribution.cumulativeProbability(x), + tolerance, + () -> "Incorrect cumulative probability value returned for " + x); + } + // verify probability(double, double) + for (int i = 0; i < points.length; i++) { + final double x0 = points[i]; + for (int j = 0; j < points.length; j++) { + final double x1 = points[j]; + if (x0 <= x1) { + Assertions.assertEquals( + values[j] - values[i], + distribution.probability(x0, x1), + tolerance); + } else { + Assertions.assertThrows(IllegalArgumentException.class, + () -> distribution.probability(x0, x1), + "distribution.probability(double, double) should have thrown an exception that first argument is too large"); + } + } + } + } + + /** + * Test that survival probability density calculations match expected values. + */ + @ParameterizedTest + @MethodSource + final void testSurvivalProbability(ContinuousDistribution distribution, + double[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final double x = points[i]; + Assertions.assertEquals( + values[i], + distribution.survivalProbability(points[i]), + tolerance, + () -> "Incorrect survival probability value returned for " + x); + } + } + + /** + * Test that CDF is simply not 1-survival function by testing values that would result + * with inaccurate results if simply calculating 1-survival function. + */ + @ParameterizedTest + @MethodSource + final void testCumulativeProbabilityHighPrecision(ContinuousDistribution distribution, + double[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final double x = points[i]; + Assertions.assertEquals( + values[i], + distribution.cumulativeProbability(x), + tolerance, + () -> "cumulative probability is not precise for value " + x); + } + } + + /** + * Test that survival is simply not 1-cdf by testing calculations that would underflow + * that calculation and result in an inaccurate answer. + */ + @ParameterizedTest + @MethodSource + final void testSurvivalProbabilityHighPrecision(ContinuousDistribution distribution, + double[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final double x = points[i]; + Assertions.assertEquals( + values[i], + distribution.survivalProbability(x), + tolerance, + () -> "survival probability is not precise for value " + x); + } + } + + /** + * Test that inverse cumulative probability density calculations match expected values + */ + @ParameterizedTest + @MethodSource + final void testInverseCumulativeProbability(ContinuousDistribution distribution, + double[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final double x = points[i]; + Assertions.assertEquals( + values[i], + distribution.inverseCumulativeProbability(points[i]), + tolerance, + () -> "Incorrect inverse cumulative probability value returned for " + x); + } + } + + /** + * Test that cumulative probability density and survival probability calculations + * sum to approximately 1.0. + */ + @ParameterizedTest + @MethodSource + final void testSurvivalAndCumulativeProbabilityComplement(ContinuousDistribution distribution, + double[] points, + double tolerance) { + for (final double x : points) { + Assertions.assertEquals( + 1.0, + distribution.survivalProbability(x) + distribution.cumulativeProbability(x), + tolerance, + () -> "survival + cumulative probability were not close to 1.0 for " + x); + } + } + + /** + * Test that probability computations are consistent. + * This checks CDF(x, x) = 0 and CDF(x1) - CDF(x0) = probability(x0, x1). + */ + @ParameterizedTest + @MethodSource + final void testConsistency(ContinuousDistribution distribution, + double[] points, + double tolerance) { + for (int i = 1; i < points.length; i++) { + + // check that cdf(x, x) = 0 + Assertions.assertEquals( + 0.0, + distribution.probability(points[i], points[i]), + tolerance); + + // check that P(a < X <= b) = P(X <= b) - P(X <= a) + final double upper = Math.max(points[i], points[i - 1]); + final double lower = Math.min(points[i], points[i - 1]); + final double diff = distribution.cumulativeProbability(upper) - + distribution.cumulativeProbability(lower); + final double direct = distribution.probability(lower, upper); + Assertions.assertEquals(diff, direct, tolerance, + () -> "Inconsistent probability for (" + lower + "," + upper + ")"); + } + } + + /** + * Test CDF and inverse CDF values at the edge of the support of the distribution return + * expected values and the CDF outside the support returns consistent values. + */ + @ParameterizedTest + @MethodSource(value = "streamDistrbution") + final void testOutsideSupport(ContinuousDistribution dist) { + // Test various quantities when the variable is outside the support. + final double lo = dist.getSupportLowerBound(); + Assertions.assertEquals(lo, dist.inverseCumulativeProbability(0.0)); + + final double below = Math.nextDown(lo); + Assertions.assertEquals(0.0, dist.density(below)); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logDensity(below)); + Assertions.assertEquals(0.0, dist.cumulativeProbability(below)); + Assertions.assertEquals(1.0, dist.survivalProbability(below)); + + final double hi = dist.getSupportUpperBound(); + Assertions.assertEquals(0.0, dist.survivalProbability(hi)); + Assertions.assertEquals(hi, dist.inverseCumulativeProbability(1.0)); + + final double above = Math.nextUp(hi); + Assertions.assertEquals(0.0, dist.density(above)); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logDensity(above)); + Assertions.assertEquals(1.0, dist.cumulativeProbability(above)); + Assertions.assertEquals(0.0, dist.survivalProbability(above)); + } + + /** + * Test invalid probabilities passed to computations that require a p-value in {@code [0, 1]} + * or a range where {@code p1 <= p2}. + */ + @ParameterizedTest + @MethodSource(value = "streamDistrbution") + final void testInvalidProbabilities(ContinuousDistribution dist) { + Assertions.assertThrows(DistributionException.class, () -> dist.probability(1, 0)); + Assertions.assertThrows(DistributionException.class, () -> dist.inverseCumulativeProbability(-1)); + Assertions.assertThrows(DistributionException.class, () -> dist.inverseCumulativeProbability(2)); + } + + /** + * Test sampling from the distribution. + */ + @ParameterizedTest + @MethodSource(value = "streamDistrbution") + final void testSampling(ContinuousDistribution dist) { + final double[] quartiles = TestUtils.getDistributionQuartiles(dist); + final double[] expected = {0.25, 0.25, 0.25, 0.25}; + + final int sampleSize = 1000; + MathArrays.scaleInPlace(sampleSize, expected); + + // Use fixed seed. + final ContinuousDistribution.Sampler sampler = + dist.createSampler(RandomSource.XO_SHI_RO_256_PP.create(123456789L)); + final double[] sample = TestUtils.sample(sampleSize, sampler); + + final long[] counts = new long[4]; + for (int i = 0; i < sampleSize; i++) { + TestUtils.updateCounts(sample[i], counts, quartiles); + } + + TestUtils.assertChiSquareAccept(expected, counts, 0.001); + } + + /** + * Test that density integrals match the distribution. + * The (filtered, sorted) points array is used to source + * integration limits. The integral of the density (estimated using a + * Legendre-Gauss integrator) is compared with the cdf over the same + * interval. Test points outside of the domain of the density function + * are discarded. + */ + @ParameterizedTest + @MethodSource + final void testDensityIntegrals(ContinuousDistribution distribution, + double[] points, + double[] values, + double tolerance) { + final BaseAbstractUnivariateIntegrator integrator = + new IterativeLegendreGaussIntegrator(5, 1e-12, 1e-10); + final UnivariateFunction d = distribution::density; + final ArrayList<Double> integrationTestPoints = new ArrayList<>(); + for (int i = 0; i < points.length; i++) { + if (Double.isNaN(values[i]) || + values[i] < 1e-5 || + values[i] > 1 - 1e-5) { + continue; // exclude integrals outside domain. + } + integrationTestPoints.add(points[i]); + } + Collections.sort(integrationTestPoints); + for (int i = 1; i < integrationTestPoints.size(); i++) { + final double x0 = integrationTestPoints.get(i - 1); + final double x1 = integrationTestPoints.get(i); + Assertions.assertEquals( + distribution.probability(x0, x1), + integrator.integrate(1000000, // Triangle integrals are very slow to converge + d, x0, x1), tolerance); + } + } + + /** + * Test the support of the distribution matches the expected values. + */ + @ParameterizedTest + @MethodSource + final void testSupport(ContinuousDistribution dist, double lower, double upper, boolean connected) { + Assertions.assertEquals(lower, dist.getSupportLowerBound()); + Assertions.assertEquals(upper, dist.getSupportUpperBound()); + Assertions.assertEquals(connected, dist.isSupportConnected()); + } + + /** + * Test the moments of the distribution matches the expected values. + */ + @ParameterizedTest + @MethodSource + final void testMoments(ContinuousDistribution dist, double mean, double variance, double tolerance) { + Assertions.assertEquals(mean, dist.getMean(), tolerance); + Assertions.assertEquals(variance, dist.getVariance(), tolerance); + } +} diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseDiscreteDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseDiscreteDistributionTest.java new file mode 100644 index 0000000..f88ce00 --- /dev/null +++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseDiscreteDistributionTest.java @@ -0,0 +1,754 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Properties; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import java.util.stream.Stream.Builder; +import org.apache.commons.math3.util.MathArrays; +import org.apache.commons.rng.simple.RandomSource; +import org.apache.commons.statistics.distribution.DistributionTestData.DiscreteDistributionTestData; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestInstance.Lifecycle; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Abstract base class for {@link DiscreteDistribution} tests. + * + * <p>This class uses parameterized tests that are repeated for instances of a + * distribution. The distribution, test input and expected values are generated + * dynamically from properties files loaded from resources. + * + * <p>The class has a single instance (see {@link Lifecycle#PER_CLASS}) that loads properties + * files from resources on creation. Resource files are assumed to be in the corresponding package + * for the class and named sequentially from 0: + * <pre> + * test.distname.0.properties + * test.distname.1.properties + * test.distname.2.properties + * </pre> + * <p>Where {@code distname} is the name of the distribution. The name is dynamically + * created in {@link #getDistributionName()} and can be overridden by implementing classes. + * A single parameterization of a distribution is tested using a single properties file. + * + * <p>To test a distribution create a sub-class and override the following methods: + * <ul> + * <li>{@link #makeDistribution(Object...) makeDistribution(Object...)} - Creates the distribution from the parameters + * <li>{@link #makeInvalidParameters()} - Generate invalid parameters for the distribution + * <li>{@link #getParameterNames()} - Return the names of parameter accessors + * </ul> + * + * <p>The distribution is created using + * {@link #makeDistribution(Object...) makeDistribution(Object...)}. This should + * create an instance of the distribution using parameters defined in the properties file. + * The parameters are parsed from String values to the appropriate parameter object. Currently + * this supports Double and Integer; numbers can be unboxed and used to create the distribution. + * + * <p>Illegal arguments for the distribution are tested from all combinations provided by + * {@link #makeInvalidParameters()}. If there are no illegal arguments this method may return + * null to skip the test. + * + * <p>If the distribution provides parameter accessors then the child test class can return + * the accessor names using {@link #getParameterNames()}. The distribution method accessors + * will be detected and invoked using reflection. This method may return + * null to skip the test. + * + * <p>The properties file must contain parameters for the distribution, properties of the + * distribution (moments and bounds) and points to test the CDF and PMF with the expected values. + * This information can be used to evaluate the distribution CDF and PMF but also the survival + * function, consistency of the probability computations and random sampling. + * + * <p>Optionally: + * <ul> + * <li>Points for the PMF (and log PMF) can be specified. The default will use CDF points. + * Note: It is not expected that evaluation of the PMF will require different points to the CDF. + * <li>Points and expected values for the inverse CDF can be specified. The default will use + * an inverse mapping of the expected CDF values to the test points. + * <li>Expected values for the log PMF can be specified. The default will use + * {@link Math#log(double)} on the PMF values. + * <li>Points and expected values for the survival function can be specified. The default will use + * the expected CDF values (SF = 1 - CDF). + * <li>A tolerance for equality assertions. The default is 1e-4. It is recommended to update + * this conservative threshold. + * </ul> + * + * <p>If the distribution provides higher precision implementations of + * cumulative probability and/or survival probability as the values approach zero, then test + * points and expected values can be provided with a tolerance for equality assertions of + * high-precision computations. The default is 1e-22. + * + * <p>Note: All properties files are read during test initialization. Any errors in a single + * property file will throw an exception, invalidating the initialization and no tests + * will be executed. + * + * <p>The parameterized tests in this class are inherited. The tests are final and cannot be + * changed. This ensures each instance of a distribution is tested for all functionality in + * the {@link DiscreteDistribution} interface. Arguments to the parameterized tests are + * generated dynamically using methods of the same name. These can be over-ridden in child + * classes to alter parameters. Throwing a + * {@link org.opentest4j.TestAbortedException TestAbortedException} in this method will + * skip the test as the arguments will not be generated. + * + * <p>Each parameterized test is effectively static; it uses no instance data. + * To implement additional test cases with a specific distribution instance and test + * data, create a test in the child class and call the relevant test case to verify + * results. Note that it is recommended to use the properties file as this ensures the + * entire functionality of the distribution is tested for that parameterization. + * + * <p>Test data should be validated against reference tables or other packages where + * possible, and the source of the reference data and/or validation should be documented + * in the properties file or additional test cases as appropriate. + * + * <p>The properties file uses {@code key=value} pairs loaded using a + * {@link java.util.Properties} object. Values will be read as String and then parsed to + * numeric data, and data arrays. Multi-line values can use a {@code \} character. + * Data in the properties file will be converted to numbers using standard parsing + * functions appropriate to the primitive type, e.g. {@link Double#parseDouble(String)}. + * Special double values should use NaN, Infinity and -Infinity. + * + * <p>The following is a complete properties file for a distribution: + * <pre> + * parameters = 0.5 1.0 + * # Computed using XYZ + * mean = 1.0 + * variance = NaN + * # optional (default -Infinity) + * lower = 0 + * # optional (default Infinity) + * upper = Infinity + * # optional (default true) + * connected = false + * # optional (default 1e-4) + * tolerance = 1e-9 + * # optional (default 1e-22) + * tolerance.hp = 1e-30 + * cdf.points = 0, 0.2 + * cdf.values = 0.0, 0.5 + * # optional (default uses cdf.values) + * pmf.points = 0, 40000 + * pmf.values = 0.0,\ + * 0.0 + * # optional (default uses log pmf.values) + * logpmf.values = -1900.123, -Infinity + * # optional (default uses cdf.points and 1 - cdf.values) + * sf.points = 400 + * sf.values = 0.0 + * # optional high-precision CDF test + * cdf.hp.points = 1e-16 + * cdf.hp.values = 1.23e-17 + * # optional high-precision survival function test + * sf.hp.points = 9 + * sf.hp.values = 2.34e-18 + * # optional inverse CDF test (default maps cdf.values to cdf.points) + * icdf.values = 0.0, 0.5 + * ipmf.values = 0.0, 0.2 + * </pre> + * + * <p>See {@link BinomialDistributionTest} for an example and the resource file {@code test.binomial.0.properties}. + */ +@TestInstance(Lifecycle.PER_CLASS) +abstract class BaseDiscreteDistributionTest + extends BaseDistributionTest<DiscreteDistribution, DiscreteDistributionTestData> { + + @Override + DiscreteDistributionTestData makeDistributionData(Properties properties) { + return new DiscreteDistributionTestData(properties); + } + + //------------------------ Methods to stream the test data ----------------------------- + + // The @MethodSource annotation will default to a no arguments method of the same name + // as the @ParameterizedTest method. These can be overridden by child classes to + // stream different arguments to the test case. + + /** + * Create a stream of arguments containing the distribution to test, the CDF test points and + * the test tolerance. + * + * @return the stream + */ + Stream<Arguments> streamCdfTestPoints() { + final Builder<Arguments> b = Stream.builder(); + final int[] size = {0}; + data.forEach(d -> { + final int[] p = d.getCdfPoints(); + if (TestUtils.getLength(p) == 0) { + return; + } + size[0]++; + b.accept(Arguments.of(namedDistribution(d.getParameters()), + namedArray("points", p), + d.getTolerance())); + }); + Assumptions.assumeTrue(size[0] != 0, () -> "Distribution has no data for test points"); + return b.build(); + } + + /** + * Create a stream of arguments containing the distribution to test, the PMF test points + * and values, and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testProbability() { + return stream(DiscreteDistributionTestData::getPmfPoints, + DiscreteDistributionTestData::getPmfValues, + DiscreteDistributionTestData::getTolerance, "pmf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the log PMF test points + * and values, and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testLogProbability() { + return stream(DiscreteDistributionTestData::getPmfPoints, + DiscreteDistributionTestData::getLogPmfValues, + DiscreteDistributionTestData::getTolerance, "logpmf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the CDF test points + * and values, and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testCumulativeProbability() { + return stream(DiscreteDistributionTestData::getCdfPoints, + DiscreteDistributionTestData::getCdfValues, + DiscreteDistributionTestData::getTolerance, "cdf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the survival function + * test points and values, and the test tolerance. + * + * <p>This defaults to using the CDF points. The survival function is tested as 1 - CDF. + * + * @return the stream + */ + Stream<Arguments> testSurvivalProbability() { + return stream(DiscreteDistributionTestData::getSfPoints, + DiscreteDistributionTestData::getSfValues, + DiscreteDistributionTestData::getTolerance, "sf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the CDF test points + * and values, and the test tolerance for high-precision computations. + * + * @return the stream + */ + Stream<Arguments> testCumulativeProbabilityHighPrecision() { + return stream(DiscreteDistributionTestData::getCdfHpPoints, + DiscreteDistributionTestData::getCdfHpValues, + DiscreteDistributionTestData::getHighPrecisionTolerance, "cdf.hp"); + } + + /** + * Create a stream of arguments containing the distribution to test, the survival function + * test points and values, and the test tolerance for high-precision computations. + * + * @return the stream + */ + Stream<Arguments> testSurvivalProbabilityHighPrecision() { + return stream(DiscreteDistributionTestData::getSfHpPoints, + DiscreteDistributionTestData::getSfHpValues, + DiscreteDistributionTestData::getHighPrecisionTolerance, "sf.hp"); + } + + /** + * Create a stream of arguments containing the distribution to test, the inverse CDF test points + * and values, and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testInverseCumulativeProbability() { + return stream(DiscreteDistributionTestData::getIcdfPoints, + DiscreteDistributionTestData::getIcdfValues, + DiscreteDistributionTestData::getTolerance, "icdf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the test points + * to evaluate the CDF and survival function, and the test tolerance. CDF + SF must equal 1. + * + * @return the stream + */ + Stream<Arguments> testSurvivalAndCumulativeProbabilityComplement() { + return streamCdfTestPoints(); + } + + /** + * Create a stream of arguments containing the distribution to test, the test points + * to evaluate the CDF and probability in a range, and the test tolerance. + * Used to test CDF(x1) - CDF(x0) = probability(x0, x1). + * + * @return the stream + */ + Stream<Arguments> testConsistency() { + return streamCdfTestPoints(); + } + + /** + * Create a stream of arguments containing the distribution to test and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testOutsideSupport() { + return data.stream().map(d -> Arguments.of(namedDistribution(d.getParameters()), d.getTolerance())); + } + + /** + * Create a stream of arguments containing the distribution to test, the PMF test points + * and values. The sampled PMF should sum to more than 50% of the distribution. + * + * @return the stream + */ + Stream<Arguments> testSampling() { + final Builder<Arguments> b = Stream.builder(); + final int[] size = {0}; + data.forEach(d -> { + final int[] p = d.getPmfPoints(); + final double[] v = d.getPmfValues(); + if (TestUtils.getLength(p) == 0 || TestUtils.getLength(v) == 0) { + return; + } + size[0]++; + b.accept(Arguments.of(namedDistribution(d.getParameters()), + namedArray("points", p), + namedArray("values", v))); + }); + Assumptions.assumeTrue(size[0] != 0, () -> "Distribution has no data for pmf"); + return b.build(); + } + + /** + * Stream the arguments to test the density integrals. The test + * sums the probability mass function between consecutive test points for the cumulative + * density function. The default tolerance is 1e-9. Override this method to change + * the tolerance. + * + * @return the stream + */ + Stream<Arguments> testDensityIntegrals() { + // Use a higher tolerance than the default of 1e-4 for the integrals + return stream(DiscreteDistributionTestData::getCdfPoints, DiscreteDistributionTestData::getCdfValues, d -> 1e-9, "cdf"); + } + + /** + * Create a stream of arguments containing the distribution to test, the support + * lower and upper bound, and the support connect flag. + * + * @return the stream + */ + Stream<Arguments> testSupport() { + return data.stream().map(d -> { + return Arguments.of(namedDistribution(d.getParameters()), d.getLower(), d.getUpper(), d.isConnected()); + }); + } + + /** + * Create a stream of arguments containing the distribution to test, the mean + * and variance, and the test tolerance. + * + * @return the stream + */ + Stream<Arguments> testMoments() { + return data.stream().map(d -> { + return Arguments.of(namedDistribution(d.getParameters()), d.getMean(), d.getVariance(), d.getTolerance()); + }); + } + + //------------------------ Tests ----------------------------- + + // Tests are final. It is expected that the test can be modified by overriding + // the method used to stream the arguments, for example to use a specific tolerance + // for a test in preference to the tolerance defined in the properties file. + + // Extract the tests from the previous abstract test + + /** + * Test that probability calculations match expected values. + */ + @ParameterizedTest + @MethodSource + final void testProbability(DiscreteDistribution distribution, + int[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final int x = points[i]; + Assertions.assertEquals(values[i], + distribution.probability(x), tolerance, + () -> "Incorrect probability mass value returned for " + x); + } + } + + /** + * Test that logarithmic probability calculations match expected values. + */ + @ParameterizedTest + @MethodSource + final void testLogProbability(DiscreteDistribution distribution, + int[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final int x = points[i]; + Assertions.assertEquals(values[i], + distribution.logProbability(x), tolerance, + () -> "Incorrect probability mass value returned for " + x); + } + } + + /** + * Test that cumulative probability density calculations match expected values. + */ + @ParameterizedTest + @MethodSource + final void testCumulativeProbability(DiscreteDistribution distribution, + int[] points, + double[] values, + double tolerance) { + // verify cumulativeProbability(double) + for (int i = 0; i < points.length; i++) { + final int x = points[i]; + Assertions.assertEquals(values[i], + distribution.cumulativeProbability(x), + tolerance, + () -> "Incorrect cumulative probability value returned for " + x); + } + // verify probability(double, double) + for (int i = 0; i < points.length; i++) { + final int x0 = points[i]; + for (int j = 0; j < points.length; j++) { + final int x1 = points[j]; + if (x0 <= x1) { + Assertions.assertEquals( + values[j] - values[i], + distribution.probability(x0, x1), + tolerance); + } else { + Assertions.assertThrows(IllegalArgumentException.class, + () -> distribution.probability(x0, x1), + "distribution.probability(int, int) should have thrown an exception that first argument is too large"); + } + } + } + } + + /** + * Test that survival probability density calculations match expected values. + */ + @ParameterizedTest + @MethodSource + final void testSurvivalProbability(DiscreteDistribution distribution, + int[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final double x = points[i]; + Assertions.assertEquals( + values[i], + distribution.survivalProbability(points[i]), + tolerance, + () -> "Incorrect survival probability value returned for " + x); + } + } + + /** + * Test that CDF is simply not 1-survival function by testing values that would result + * with inaccurate results if simply calculating 1-survival function. + */ + @ParameterizedTest + @MethodSource + final void testCumulativeProbabilityHighPrecision(DiscreteDistribution distribution, + int[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final int x = points[i]; + Assertions.assertEquals( + values[i], + distribution.cumulativeProbability(x), + tolerance, + () -> "cumulative probability is not precise for value " + x); + } + } + + /** + * Test that survival is simply not 1-cdf by testing calculations that would underflow + * that calculation and result in an inaccurate answer. + */ + @ParameterizedTest + @MethodSource + final void testSurvivalProbabilityHighPrecision(DiscreteDistribution distribution, + int[] points, + double[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final int x = points[i]; + Assertions.assertEquals( + values[i], + distribution.survivalProbability(x), + tolerance, + () -> "survival probability is not precise for value " + x); + } + } + + /** + * Test that inverse cumulative probability density calculations match expected values + */ + @ParameterizedTest + @MethodSource + final void testInverseCumulativeProbability(DiscreteDistribution distribution, + double[] points, + int[] values, + double tolerance) { + for (int i = 0; i < points.length; i++) { + final double x = points[i]; + Assertions.assertEquals( + values[i], + distribution.inverseCumulativeProbability(points[i]), + tolerance, + () -> "Incorrect inverse cumulative probability value returned for " + x); + } + } + + /** + * Test that cumulative probability density and survival probability calculations + * sum to approximately 1.0. + */ + @ParameterizedTest + @MethodSource + final void testSurvivalAndCumulativeProbabilityComplement(DiscreteDistribution distribution, + int[] points, + double tolerance) { + for (final int x : points) { + Assertions.assertEquals( + 1.0, + distribution.survivalProbability(x) + distribution.cumulativeProbability(x), + tolerance, + () -> "survival + cumulative probability were not close to 1.0 for " + x); + } + } + + /** + * Test that probability computations are consistent. + * This checks CDF(x, x) = 0 and CDF(x1) - CDF(x0) = probability(x0, x1). + */ + @ParameterizedTest + @MethodSource + final void testConsistency(DiscreteDistribution distribution, + int[] points, + double tolerance) { + for (int i = 1; i < points.length; i++) { + + // check that cdf(x, x) = 0 + Assertions.assertEquals( + 0.0, + distribution.probability(points[i], points[i]), + tolerance); + + // check that P(a < X <= b) = P(X <= b) - P(X <= a) + final int upper = Math.max(points[i], points[i - 1]); + final int lower = Math.min(points[i], points[i - 1]); + final double diff = distribution.cumulativeProbability(upper) - + distribution.cumulativeProbability(lower); + final double direct = distribution.probability(lower, upper); + Assertions.assertEquals(diff, direct, tolerance, + () -> "Inconsistent probability for (" + lower + "," + upper + ")"); + } + } + + /** + * Test CDF and inverse CDF values at the edge of the support of the distribution return + * expected values and the CDF outside the support returns consistent values. + */ + @ParameterizedTest + @MethodSource + final void testOutsideSupport(DiscreteDistribution dist, + double tolerance) { + // Test various quantities when the variable is outside the support. + final int lo = dist.getSupportLowerBound(); + Assertions.assertEquals(dist.probability(lo), dist.cumulativeProbability(lo), tolerance); + Assertions.assertEquals(lo, dist.inverseCumulativeProbability(0.0)); + + if (lo != Integer.MIN_VALUE) { + final int below = lo - 1; + Assertions.assertEquals(0.0, dist.probability(below)); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logProbability(below)); + Assertions.assertEquals(0.0, dist.cumulativeProbability(below)); + Assertions.assertEquals(1.0, dist.survivalProbability(below)); + } + + final int hi = dist.getSupportUpperBound(); + Assertions.assertEquals(0.0, dist.survivalProbability(hi)); + Assertions.assertEquals(dist.probability(hi), dist.survivalProbability(hi - 1), tolerance); + Assertions.assertEquals(hi, dist.inverseCumulativeProbability(1.0)); + if (hi != Integer.MAX_VALUE) { + final int above = hi + 1; + Assertions.assertEquals(0.0, dist.probability(above)); + Assertions.assertEquals(Double.NEGATIVE_INFINITY, dist.logProbability(above)); + Assertions.assertEquals(1.0, dist.cumulativeProbability(above)); + Assertions.assertEquals(0.0, dist.survivalProbability(above)); + } + + // Test the logProbability at the support bound. This hits edge case coverage for logProbability. + // It is assumed the log probability may support a value when the plain probability will be zero. + // So do not test Math.log(dist.probability(x)) == dist.logProbability(x) + Assertions.assertEquals(dist.probability(lo), Math.exp(dist.logProbability(lo)), tolerance); + Assertions.assertEquals(dist.probability(hi), Math.exp(dist.logProbability(hi)), tolerance); + } + + /** + * Test invalid probabilities passed to computations that require a p-value in {@code [0, 1]} + * or a range where {@code p1 <= p2}. + */ + @ParameterizedTest + @MethodSource(value = "streamDistrbution") + final void testInvalidProbabilities(DiscreteDistribution dist) { + Assertions.assertThrows(DistributionException.class, () -> dist.probability(1, 0)); + Assertions.assertThrows(DistributionException.class, () -> dist.inverseCumulativeProbability(-1)); + Assertions.assertThrows(DistributionException.class, () -> dist.inverseCumulativeProbability(2)); + } + + /** + * Test sampling from the distribution. + */ + @ParameterizedTest + @MethodSource + final void testSampling(DiscreteDistribution dist, + int[] points, + double[] values) { + // This test uses the points that are used to test the distribution PMF. + // The sum of the probability values does not have to be 1 (or very close to 1). + // Any value generated by the sampler that is not an expected point will + // be ignored. If the sum of probabilities is above 0.5 then at least half + // of the samples should be counted and the test will verify these occur with + // the expected relative frequencies. Note: The expected values are normalised + // to 1 (i.e. relative frequencies) by the Chi-square test. + points = points.clone(); + values = values.clone(); + final int length = TestUtils.eliminateZeroMassPoints(points, values); + final double[] expected = Arrays.copyOf(values, length); + + // This test will not be valid if the points do not represent enough of the PMF. + // Require at least 50%. + final double sum = Arrays.stream(expected).sum(); + Assumptions.assumeTrue(sum > 0.5, + () -> "Not enough of the PMF is tested during sampling: " + sum); + + // Use fixed seed. + final DiscreteDistribution.Sampler sampler = + dist.createSampler(RandomSource.XO_SHI_RO_256_PP.create(1234567890L)); + + // Edge case for distributions with all mass in a single point + if (length == 1) { + final int point = points[0]; + for (int i = 0; i < 20; i++) { + Assertions.assertEquals(point, sampler.sample()); + } + return; + } + + final int sampleSize = 1000; + MathArrays.scaleInPlace(sampleSize, expected); + + final int[] sample = TestUtils.sample(sampleSize, sampler); + + final long[] counts = new long[length]; + for (int i = 0; i < sampleSize; i++) { + final int x = sample[i]; + for (int j = 0; j < length; j++) { + if (x == points[j]) { + counts[j]++; + break; + } + } + } + + TestUtils.assertChiSquareAccept(points, expected, counts, 0.001); + } + + /** + * Test that density integrals match the distribution. + * The (filtered, sorted) points array is used to source + * integration limits. The integral of the probability mass function + * is compared with the cdf over the same interval. + * Test points outside of the domain of the density function + * are discarded. + */ + @ParameterizedTest + @MethodSource + final void testDensityIntegrals(DiscreteDistribution dist, + int[] points, + double[] values, + double tolerance) { + final ArrayList<Integer> integrationTestPoints = new ArrayList<>(); + for (int i = 0; i < points.length; i++) { + if (Double.isNaN(values[i]) || + values[i] < 1e-5 || + values[i] > 1 - 1e-5) { + continue; // exclude integrals outside domain. + } + integrationTestPoints.add(points[i]); + } + Collections.sort(integrationTestPoints); + for (int i = 1; i < integrationTestPoints.size(); i++) { + final int x0 = integrationTestPoints.get(i - 1); + final int x1 = integrationTestPoints.get(i); + // Ignore large ranges + if (x1 - x0 > 50) { + continue; + } + final double sum = IntStream.rangeClosed(x0 + 1, x1).mapToDouble(dist::probability).sum(); + Assertions.assertEquals(dist.probability(x0, x1), sum, tolerance); + } + } + + /** + * Test the support of the distribution matches the expected values. + */ + @ParameterizedTest + @MethodSource + final void testSupport(DiscreteDistribution dist, double lower, double upper, boolean connected) { + Assertions.assertEquals(lower, dist.getSupportLowerBound()); + Assertions.assertEquals(upper, dist.getSupportUpperBound()); + Assertions.assertEquals(connected, dist.isSupportConnected()); + } + + /** + * Test the moments of the distribution matches the expected values. + */ + @ParameterizedTest + @MethodSource + final void testMoments(DiscreteDistribution dist, double mean, double variance, double tolerance) { + Assertions.assertEquals(mean, dist.getMean(), tolerance); + Assertions.assertEquals(variance, dist.getVariance(), tolerance); + } +} diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseDistributionTest.java new file mode 100644 index 0000000..f6ef794 --- /dev/null +++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BaseDistributionTest.java @@ -0,0 +1,379 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Locale; +import java.util.Properties; +import java.util.function.Function; +import java.util.function.ToDoubleFunction; +import java.util.stream.Stream; +import java.util.stream.Stream.Builder; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Named; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestInstance.Lifecycle; +import org.junit.jupiter.api.extension.ParameterContext; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.aggregator.AggregateWith; +import org.junit.jupiter.params.aggregator.ArgumentsAccessor; +import org.junit.jupiter.params.aggregator.ArgumentsAggregationException; +import org.junit.jupiter.params.aggregator.ArgumentsAggregator; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Abstract base class for distribution tests. + * + * <p>This class uses parameterized tests that are repeated for instances of a + * distribution. The distribution, test input and expected values are generated + * dynamically from properties files loaded from resources. + * + * <p>The class has two specializations for testing {@link ContinuousDistribution} and + * {@link DiscreteDistribution}. It is not intended to extend this class when creating + * a test for a new distribution. This class exists for the sole purpose of containing + * commons functionality to search for and load properties files containing the distribution + * data. + * + * <p>To test a new distribution extend the specialized classes: + * <ul> + * <li>{@link BaseContinuousDistributionTest} + * <li>{@link BaseDiscreteDistributionTest} + * </ul> + * + * @param <T> Distribution type + * @param <D> Distribution data type + */ +@TestInstance(Lifecycle.PER_CLASS) +abstract class BaseDistributionTest<T, D extends DistributionTestData> { + /** The test data. Protected to allow use in sub-classes. */ + protected final List<D> data = new ArrayList<>(); + + /** + * Setup the test using data loaded from resource files. + * Resource files are assumed to be named sequentially from 0: + * <pre> + * test.distname.0.properties + * test.distname.1.properties + * test.distname.2.properties + * </pre> + * <p>Where {@code distname} is the name of the distribution. The name + * is dynamically created in {@link #getDistributionName()} and can be + * overridden by implementing classes. + */ + @BeforeAll + void setup() { + final String key = getDistributionName().toLowerCase(Locale.ROOT); + for (int i = 0; ; i++) { + final String filename = String.format("test.%s.%d.properties", key, i); + try (InputStream resource = this.getClass().getResourceAsStream( + filename)) { + if (resource == null) { + break; + } + // Load properties file + final Properties prop = new Properties(); + prop.load(resource); + // Convert the properties to a D instance + data.add(makeDistributionData(prop)); + } catch (IOException | NullPointerException | IllegalArgumentException e) { + Assertions.fail("Failed to load test data: " + filename, e); + } + } + } + + /** + * Gets the distribution name. This is used to search for test case resource files. + * + * <p>The default implementation removes the text {@code DistributionTest} from the + * simple class name. + * + * @return the distribution name + * @see Class#getSimpleName() + */ + String getDistributionName() { + return getClass().getSimpleName().replace("DistributionTest", ""); + } + + /** + * Create a new distribution data instance from the properties. + * + * @param properties Properties + * @return the distribution data + */ + abstract D makeDistributionData(Properties properties); + + /** + * Create a new distribution instance from the parameters. + * It is assumed the parameters match the order of the parameter constructor. + * + * @param parameters Parameters of the distribution. + * @return the distribution + */ + abstract T makeDistribution(Object... parameters); + + /** Creates invalid parameters that are expected to throw an exception when passed to + * the {@link #makeDistribution(Object...)} method. + * + * <p>This may return as many inner parameter arrays as is required to test all permutations + * of invalid parameters to the distribution. + * @return Array of invalid parameter arrays + */ + abstract Object[][] makeInvalidParameters(); + + /** + * Gets the parameter names. + * The names will be used with reflection to identify a parameter accessor in the distribution + * with the name {@code getX()} where {@code X} is the parameter name. + * The names should use the same order as {@link #makeDistribution(Object...)}. + * + * <p>Return {@code null} to ignore this test. Return {@code null} for an element of the + * returned array to ignore that parameter. + * + * @return the parameter names + */ + abstract String[] getParameterNames(); + + + //------------------------ Methods to stream the test data ----------------------------- + + // The @MethodSource annotation will default to a no arguments method of the same name + // as the @ParameterizedTest method. These can be overridden by child classes to + // stream different arguments to the test case. + + /** + * Create a named argument for the distribution from the parameters. + * This is a convenience method to present the distribution with a short name in a test report. + * + * <p>This is used to create a new instance of the distribution for a test. + * + * @param parameters Parameters of the distribution. + * @return the distribution argument + */ + Named<T> namedDistribution(Object... parameters) { + final T dist = makeDistribution(parameters); + final String name = dist.getClass().getSimpleName() + " " + Arrays.toString(parameters); + return Named.of(name, dist); + } + + /** + * Create a named argument for the array. + * This is a convenience method to present arrays with a short name in a test report. + * May be overridden for example to output more array details. + * + * @param name Name + * @param array Array + * @return the named argument + */ + Named<?> namedArray(String name, Object array) { + if (array instanceof double[]) { + return namedArray(name, (double[]) array); + } + if (array instanceof int[]) { + return namedArray(name, (int[]) array); + } + return Named.of(name, array); + } + + /** + * Create a named argument for the array. + * This is a convenience method to present arrays with a short name in a test report. + * May be overridden for example to output more array details. + * + * @param name Name + * @param array Array + * @return the named argument + */ + Named<double[]> namedArray(String name, double[] array) { + // Create the name using the first 3 elements + final StringBuilder sb = new StringBuilder(75); + sb.append(name); + // Assume length is non-zero length + int i = 0; + sb.append(" ["); + sb.append(array[i++]); + while (i < Math.min(3, array.length)) { + sb.append(", "); + sb.append(array[i++]); + } + if (i < array.length) { + sb.append(", ... "); + } + sb.append(']'); + return Named.of(sb.toString(), array); + } + + /** + * Create a named argument for the array. + * This is a convenience method to present arrays with a short name in a test report. + * May be overridden for example to output more array details. + * + * @param name Name + * @param array Array + * @return the named argument + */ + Named<int[]> namedArray(String name, int[] array) { + // Create the name using the first 3 elements + final StringBuilder sb = new StringBuilder(75); + sb.append(name); + // Assume length is non-zero length + int i = 0; + sb.append(" ["); + sb.append(array[i++]); + while (i < Math.min(3, array.length)) { + sb.append(", "); + sb.append(array[i++]); + } + if (i < array.length) { + sb.append(", ... "); + } + sb.append(']'); + return Named.of(sb.toString(), array); + } + /** + * Create a stream of arguments containing the distribution to test. + * + * @return the stream + */ + Stream<Arguments> streamDistrbution() { + return data.stream().map(d -> Arguments.of(namedDistribution(d.getParameters()))); + } + + /** + * Create a stream of arguments containing the distribution to test, the test + * points, test values and the test tolerance. The points, values and tolerance + * are identified using functions on the test instance data. + * + * <p>If the length of the points or values is zero then a + * {@link org.opentest4j.TestAbortedException TestAbortedException} is raised. + * + * @param points Function to create the points + * @param values Function to create the values + * @param tolerance Function to create the tolerance + * @param name Name function under test + * @return the stream + */ + <P, V> Stream<Arguments> stream(Function<D, P> points, + Function<D, V> values, + ToDoubleFunction<D> tolerance, + String name) { + final Builder<Arguments> b = Stream.builder(); + final int[] size = {0}; + data.forEach(d -> { + final P p = points.apply(d); + final V v = values.apply(d); + if (TestUtils.getLength(p) == 0 || TestUtils.getLength(v) == 0) { + return; + } + size[0]++; + b.accept(Arguments.of(namedDistribution(d.getParameters()), + namedArray("points", p), + namedArray("values", v), + tolerance.applyAsDouble(d))); + }); + Assumptions.assumeTrue(size[0] != 0, () -> "Distribution has no data for " + name); + return b.build(); + } + + /** + * Create arguments to test invalid parameters of the distribution. Each Object[] + * will be expected to raise an exception when passed to the {@link #makeDistribution(Object...)} + * method. + * + * @return the arguments + */ + Object[][] testInvalidParameters() { + final Object[][] params = makeInvalidParameters(); + Assumptions.assumeTrue(params != null, "Distribution has no invalid parameters"); + return params; + } + + /** + * Create a stream of arguments containing the parameters used to construct a distribution + * using {@link #makeDistribution(Object...)}. + * + * @return the stream + */ + Stream<Arguments> testParameterAccessors() { + return data.stream().map(d -> Arguments.of(d.getParameters())); + } + + //------------------------ Tests ----------------------------- + + // Tests are final. It is expected that the test can be modified by overriding + // the method used to stream the arguments, for example to use a specific tolerance + // for a test in preference to the tolerance defined in the properties file. + + /** + * Test invalid parameters will raise an exception when used to construct a distribution. + */ + @ParameterizedTest + @MethodSource + final void testInvalidParameters(@AggregateWith(value = ArrayAggregator.class) Object[] parameters) { + Assertions.assertThrows(DistributionException.class, () -> makeDistribution(parameters)); + } + + /** + * Test the parameter accessors using the reflection API. + */ + @ParameterizedTest + @MethodSource + final void testParameterAccessors(@AggregateWith(value = ArrayAggregator.class) Object[] parameters) { + final String[] names = getParameterNames(); + Assumptions.assumeTrue(names != null, "No parameter accessors"); + Assertions.assertEquals(parameters.length, names.length, "Parameter <-> names length mismatch"); + + final T dist = makeDistribution(parameters); + for (int i = 0; i < names.length; i++) { + final String name = names[i]; + if (name == null) { + continue; + } + try { + final Method method = dist.getClass().getMethod("get" + name); + final Object o = method.invoke(dist); + Assertions.assertEquals(parameters[i], o, () -> "Invalid parameter for " + name); + } catch (NoSuchMethodException | SecurityException | IllegalAccessException | + IllegalArgumentException | InvocationTargetException e) { + Assertions.fail("Failed to find method accessor: " + name, e); + } + } + } + + /** + * Aggregate all arguments as a single {@code Object[]} array. + * + * <p>Note: The default JUnit 5 behaviour for an Argument containing an {@code Object[]} is + * to uses each element of the Object array as an indexed argument. This aggregator changes + * the behaviour to pass the Object[] as argument index 0. + */ + static class ArrayAggregator implements ArgumentsAggregator { + @Override + public Object aggregateArguments(ArgumentsAccessor accessor, ParameterContext context) + throws ArgumentsAggregationException { + return accessor.toArray(); + } + } +} diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BinomialDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BinomialDistributionTest.java index 685691b..eb5bc10 100644 --- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BinomialDistributionTest.java +++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/BinomialDistributionTest.java @@ -17,198 +17,37 @@ package org.apache.commons.statistics.distribution; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; /** - * Test cases for BinomialDistribution. Extends DiscreteDistributionAbstractTest. - * See class javadoc for DiscreteDistributionAbstractTest for details. + * Test for the {@link BinomialDistribution}. */ -class BinomialDistributionTest extends DiscreteDistributionAbstractTest { - - //---------------------- Override tolerance -------------------------------- - - @BeforeEach - void customSetUp() { - setTolerance(1e-12); - } - - //-------------- Implementations for abstract methods ---------------------- - - @Override - public DiscreteDistribution makeDistribution() { - return new BinomialDistribution(10, 0.70); - } - - @Override - public int[] makeProbabilityTestPoints() { - return new int[] {-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - } - +class BinomialDistributionTest extends BaseDiscreteDistributionTest { @Override - public double[] makeProbabilityTestValues() { - // Reference values are from R, version 2.15.3. - return new double[] {0d, 0.0000059049d, 0.000137781d, 0.0014467005, - 0.009001692, 0.036756909, 0.1029193452, 0.200120949, 0.266827932, - 0.2334744405, 0.121060821, 0.0282475249, 0d}; + DiscreteDistribution makeDistribution(Object... parameters) { + return new BinomialDistribution((Integer) parameters[0], (Double) parameters[1]); } @Override - public int[] makeCumulativeTestPoints() { - return makeProbabilityTestPoints(); + Object[][] makeInvalidParameters() { + return new Object[][] { + {-1, 0.1}, + {10, -0.1}, + {10, 1.1}, + }; } @Override - public double[] makeCumulativeTestValues() { - // Reference values are from R, version 2.15.3. - return new double[] {0d, 5.9049e-06, 0.0001436859, 0.0015903864, 0.0105920784, 0.0473489874, - 0.1502683326, 0.3503892816, 0.6172172136, 0.8506916541, 0.9717524751, 1d, 1d}; - } - - @Override - public double[] makeInverseCumulativeTestPoints() { - return new double[] {0, 0.001d, 0.010d, 0.025d, 0.050d, 0.100d, - 0.999d, 0.990d, 0.975d, 0.950d, 0.900d, 1d}; - } - - @Override - public int[] makeInverseCumulativeTestValues() { - return new int[] {0, 2, 3, 4, 5, 5, 10, 10, 10, 9, 9, 10}; + String[] getParameterNames() { + return new String[] {"NumberOfTrials", "ProbabilityOfSuccess"}; } //-------------------- Additional test cases ------------------------------- - /** Test case n = 10, p = 0.3. */ - @Test - void testSmallPValue() { - final BinomialDistribution dist = new BinomialDistribution(10, 0.3); - setDistribution(dist); - setCumulativeTestPoints(makeCumulativeTestPoints()); - // computed using R version 3.4.4 - setCumulativeTestValues(new double[] {0.00000000000000000000, 0.02824752489999998728, 0.14930834590000002793, - 0.38278278639999974153, 0.64961071840000017552, 0.84973166740000016794, 0.95265101260000006889, - 0.98940792160000001765, 0.99840961360000002323, 0.99985631409999997654, 0.99999409509999992451, - 1.00000000000000000000, 1.00000000000000000000}); - setProbabilityTestPoints(makeProbabilityTestPoints()); - setProbabilityTestValues(new double[] {0.0000000000000000000e+00, 2.8247524899999980341e-02, - 1.2106082099999991575e-01, 2.3347444049999999116e-01, 2.6682793199999993439e-01, 2.0012094900000007569e-01, - 1.0291934520000002584e-01, 3.6756909000000004273e-02, 9.0016919999999864960e-03, 1.4467005000000008035e-03, - 1.3778099999999990615e-04, 5.9048999999999949131e-06, 0.0000000000000000000e+00}); - setInverseCumulativeTestPoints(makeInverseCumulativeTestPoints()); - setInverseCumulativeTestValues(new int[] {0, 0, 0, 0, 1, 1, 8, 7, 6, 5, 5, 10}); - verifyProbabilities(); - verifyLogProbabilities(); - verifyCumulativeProbabilities(); - verifySurvivalProbability(); - verifySurvivalAndCumulativeProbabilityComplement(); - verifyInverseCumulativeProbabilities(); - } - - /** Test degenerate case p = 0 */ - @Test - void testDegenerate0() { - final BinomialDistribution dist = new BinomialDistribution(5, 0.0d); - setDistribution(dist); - setCumulativeTestPoints(new int[] {-1, 0, 1, 5, 10}); - setCumulativeTestValues(new double[] {0d, 1d, 1d, 1d, 1d}); - setProbabilityTestPoints(new int[] {-1, 0, 1, 10, 11}); - setProbabilityTestValues(new double[] {0d, 1d, 0d, 0d, 0d}); - setInverseCumulativeTestPoints(new double[] {0.1d, 0.5d}); - setInverseCumulativeTestValues(new int[] {0, 0}); - verifyProbabilities(); - verifyLogProbabilities(); - verifyCumulativeProbabilities(); - verifySurvivalProbability(); - verifySurvivalAndCumulativeProbabilityComplement(); - verifyInverseCumulativeProbabilities(); - Assertions.assertEquals(0, dist.getSupportLowerBound()); - Assertions.assertEquals(0, dist.getSupportUpperBound()); - } - - /** Test degenerate case p = 1 */ - @Test - void testDegenerate1() { - final BinomialDistribution dist = new BinomialDistribution(5, 1.0d); - setDistribution(dist); - setCumulativeTestPoints(new int[] {-1, 0, 1, 2, 5, 10}); - setCumulativeTestValues(new double[] {0d, 0d, 0d, 0d, 1d, 1d}); - setProbabilityTestPoints(new int[] {-1, 0, 1, 2, 5, 10}); - setProbabilityTestValues(new double[] {0d, 0d, 0d, 0d, 1d, 0d}); - setInverseCumulativeTestPoints(new double[] {0.1d, 0.5d}); - setInverseCumulativeTestValues(new int[] {5, 5}); - verifyProbabilities(); - verifyLogProbabilities(); - verifyCumulativeProbabilities(); - verifySurvivalProbability(); - verifySurvivalAndCumulativeProbabilityComplement(); - verifyInverseCumulativeProbabilities(); - Assertions.assertEquals(5, dist.getSupportLowerBound()); - Assertions.assertEquals(5, dist.getSupportUpperBound()); - } - - /** Test degenerate case n = 0 */ - @Test - void testDegenerate2() { - final BinomialDistribution dist = new BinomialDistribution(0, 0.01d); - setDistribution(dist); - setCumulativeTestPoints(new int[] {-1, 0, 1, 2, 5, 10}); - setCumulativeTestValues(new double[] {0d, 1d, 1d, 1d, 1d, 1d}); - setProbabilityTestPoints(new int[] {-1, 0, 1, 2, 5, 10}); - setProbabilityTestValues(new double[] {0d, 1d, 0d, 0d, 0d, 0d}); - setInverseCumulativeTestPoints(new double[] {0.1d, 0.5d}); - setInverseCumulativeTestValues(new int[] {0, 0}); - verifyProbabilities(); - verifyLogProbabilities(); - verifyCumulativeProbabilities(); - verifySurvivalProbability(); - verifySurvivalAndCumulativeProbabilityComplement(); - verifyInverseCumulativeProbabilities(); - Assertions.assertEquals(0, dist.getSupportLowerBound()); - Assertions.assertEquals(0, dist.getSupportUpperBound()); - } - - @ParameterizedTest - @CsvSource({ - "11, 0.1", - "42, 0.456", - "999, 0.999", - }) - void testParameterAccessors(int trials, double p) { - final BinomialDistribution dist = new BinomialDistribution(trials, p); - Assertions.assertEquals(trials, dist.getNumberOfTrials()); - Assertions.assertEquals(p, dist.getProbabilityOfSuccess()); - } - - @ParameterizedTest - @CsvSource({ - "-1, 0.1", - "10, -0.1", - "10, 1.1", - }) - void testConstructorPreconditions(int trials, double p) { - Assertions.assertThrows(DistributionException.class, () -> new BinomialDistribution(trials, p)); - } - - @Test - void testMoments() { - final double tol = 1e-9; - BinomialDistribution dist; - - dist = new BinomialDistribution(10, 0.5); - Assertions.assertEquals(10d * 0.5d, dist.getMean(), tol); - Assertions.assertEquals(10d * 0.5d * 0.5d, dist.getVariance(), tol); - - dist = new BinomialDistribution(30, 0.3); - Assertions.assertEquals(30d * 0.3d, dist.getMean(), tol); - Assertions.assertEquals(30d * 0.3d * (1d - 0.3d), dist.getVariance(), tol); - } - @Test void testMath718() { - // for large trials the evaluation of ContinuedFraction was inaccurate - // do a sweep over several large trials to test if the current implementation is + // For large trials the evaluation of ContinuedFraction was inaccurate. + // Do a sweep over several large trials to test if the current implementation is // numerically stable. for (int trials = 500000; trials < 20000000; trials += 100000) { @@ -217,22 +56,4 @@ class BinomialDistributionTest extends DiscreteDistributionAbstractTest { Assertions.assertEquals(trials / 2, p); } } - - @Test - void testHighPrecisionCumulativeProbabilities() { - // computed using R version 3.4.4 - setDistribution(new BinomialDistribution(100, 0.99)); - setCumulativePrecisionTestPoints(new int[] {82, 81}); - setCumulativePrecisionTestValues(new double[] {1.4061271955993513664e-17, 6.1128083336354843707e-19}); - verifyCumulativeProbabilityPrecision(); - } - - @Test - void testHighPrecisionSurvivalProbabilities() { - // computed using R version 3.4.4 - setDistribution(new BinomialDistribution(100, 0.01)); - setSurvivalPrecisionTestPoints(new int[] {18, 19}); - setSurvivalPrecisionTestValues(new double[] {6.1128083336353977038e-19, 2.4944165604029235392e-20}); - verifySurvivalProbabilityPrecision(); - } } diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/DistributionTestData.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/DistributionTestData.java new file mode 100644 index 0000000..02a41e4 --- /dev/null +++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/DistributionTestData.java @@ -0,0 +1,729 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.statistics.distribution; + +import java.util.Arrays; +import java.util.Objects; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * Contains the data for the distribution parameters, the expected properties + * of the distribution (moments and support bounds) and test points to evaluate + * with expected values. + */ +abstract class DistributionTestData { + /** Regex to split delimited text data (e.g. arrays of numbers). */ + private static final Pattern PATTERN = Pattern.compile("[ ,]+"); + + /** Distribution parameters. */ + private final Object[] parameters; + /** Mean. */ + private final double mean; + /** Variance. */ + private final double variance; + /** Support connected flag. */ + private final boolean connected; + /** Test tolerance for calculations. */ + private final double tolerance; + /** Test tolerance for high-precision calculations.. */ + private final double hpTolerance; + /** Expected CDF values. */ + private final double[] cdfValues; + /** Expected PDF values. */ + private final double[] pdfValues; + /** Expected log PDF values. */ + private final double[] logPdfValues; + /** Expected SF values for the survival function test points. */ + private final double[] sfValues; + /** Expected CDF values for the high-precision CDF test points. */ + private final double[] cdfHpValues; + /** Expected CDF values for the high-precision survival function test points. */ + private final double[] sfHpValues; + + /** + * Contains the data for the continuous distribution parameters, the expected properties + * of the distribution (moments and support bounds) and test points to evaluate + * with expected values. + */ + static class ContinuousDistributionTestData extends DistributionTestData { + /** Support lower bound. */ + private final double lower; + /** Support upper bound. */ + private final double upper; + /** Test points to evaluate the CDF. */ + private final double[] cdfPoints; + /** Test points to evaluate the PDF. */ + private final double[] pdfPoints; + /** Test points to evaluate survival function computations. */ + private final double[] sfPoints; + /** Test points to evaluate high-precision CDF computations. */ + private final double[] cdfHpPoints; + /** Test points to evaluate high-precision survival function computations. */ + private final double[] sfHpPoints; + /** Test points to evaluate the inverse CDF. */ + private final double[] icdfPoints; + /** Expected inverse CDF values. */ + private final double[] icdfValues; + + /** + * @param props Properties containing the test data + */ + ContinuousDistributionTestData(Properties props) { + super(props); + // Load all the data + lower = getAsDouble(props, "lower", Double.NEGATIVE_INFINITY); + upper = getAsDouble(props, "upper", Double.POSITIVE_INFINITY); + // Required + cdfPoints = getAsDoubleArray(props, "cdf.points"); + pdfPoints = getAsDoubleArray(props, "pdf.points", cdfPoints); + sfPoints = getAsDoubleArray(props, "sf.points", cdfPoints); + cdfHpPoints = getAsDoubleArray(props, "cdf.hp.points", null); + sfHpPoints = getAsDoubleArray(props, "sf.hp.points", null); + // Default to inverse mapping + icdfPoints = getAsDoubleArray(props, "icdf.points", getCdfValues()); + icdfValues = getAsDoubleArray(props, "icdf.values", cdfPoints); + // Validation + validatePair(cdfPoints, getCdfValues(), "cdf"); + validatePair(pdfPoints, getPdfValues(), "pdf"); + validatePair(pdfPoints, getLogPdfValues(), "logpdf"); + validatePair(sfPoints, getSfValues(), "sf"); + validatePair(cdfHpPoints, getCdfHpValues(), "cdf.hp"); + validatePair(sfHpPoints, getSfHpValues(), "sf.hp"); + validatePair(icdfPoints, icdfValues, "icdf"); + } + + @Override + String getProbabilityFunctionName() { + return "pdf"; + } + + /** + * Gets the support lower bound of the distribution. + * + * @return the lower bound + */ + double getLower() { + return lower; + } + + /** + * Gets the support upper bound of the distribution. + * + * @return the upper bound + */ + double getUpper() { + return upper; + } + + /** + * Gets the points to evaluate the CDF. + * + * @return the points + */ + double[] getCdfPoints() { + return cdfPoints; + } + + /** + * Gets the points to evaluate the PDF. + * + * @return the points + */ + double[] getPdfPoints() { + return pdfPoints; + } + + /** + * Gets the points to evaluate for survival function. + * + * @return the SF points + */ + double[] getSfPoints() { + return sfPoints; + } + + /** + * Gets the points to evaluate the cumulative probability where the result + * is expected to be approaching zero and requires a high-precision computation. + * + * @return the CDF high-precision points + */ + double[] getCdfHpPoints() { + return cdfHpPoints; + } + + /** + * Gets the points to evaluate the survival probability where the result + * is expected to be approaching zero and requires a high-precision computation. + * + * @return the survival function high-precision points + */ + double[] getSfHpPoints() { + return sfHpPoints; + } + + @Override + double[] getIcdfPoints() { + return icdfPoints; + } + + /** + * Gets the expected inverse cumulative probability values for the test inverse CDF points. + * + * @return the inverse CDF values + */ + double[] getIcdfValues() { + return icdfValues; + } + } + + /** + * Contains the data for the continuous distribution parameters, the expected properties + * of the distribution (moments and support bounds) and test points to evaluate + * with expected values. + */ + static class DiscreteDistributionTestData extends DistributionTestData { + /** Support lower bound. */ + private final int lower; + /** Support upper bound. */ + private final int upper; + /** Test points to evaluate the CDF. */ + private final int[] cdfPoints; + /** Test points to evaluate the PDF. */ + private final int[] pmfPoints; + /** Test points to evaluate survival function computations. */ + private final int[] sfPoints; + /** Test points to evaluate high-precision CDF computations. */ + private final int[] cdfHpPoints; + /** Test points to evaluate high-precision survival function computations. */ + private final int[] sfHpPoints; + /** Test points to evaluate the inverse CDF. */ + private final double[] icdfPoints; + /** Expected inverse CDF values. */ + private final int[] icdfValues; + + /** + * @param props Properties containing the test data + */ + DiscreteDistributionTestData(Properties props) { + super(props); + // Load all the data + lower = getAsInt(props, "lower", Integer.MIN_VALUE); + upper = getAsInt(props, "upper", Integer.MAX_VALUE); + // Required + cdfPoints = getAsIntArray(props, "cdf.points"); + pmfPoints = getAsIntArray(props, "pmf.points", cdfPoints); + sfPoints = getAsIntArray(props, "sf.points", cdfPoints); + cdfHpPoints = getAsIntArray(props, "cdf.hp.points", null); + sfHpPoints = getAsIntArray(props, "sf.hp.points", null); + // Default to inverse mapping + icdfPoints = getAsDoubleArray(props, "icdf.points", getCdfValues()); + icdfValues = getAsIntArray(props, "icdf.values", cdfPoints); + // Validation + validatePair(cdfPoints, getCdfValues(), "cdf"); + validatePair(pmfPoints, getPdfValues(), "pdf"); + validatePair(pmfPoints, getLogPdfValues(), "logpdf"); + validatePair(sfPoints, getSfValues(), "sf"); + validatePair(cdfHpPoints, getCdfHpValues(), "cdf.hp"); + validatePair(sfHpPoints, getSfHpValues(), "sf.hp"); + validatePair(icdfPoints, icdfValues, "icdf"); + } + + @Override + String getProbabilityFunctionName() { + return "pmf"; + } + + /** + * Gets the support lower bound of the distribution. + * + * @return the lower bound + */ + int getLower() { + return lower; + } + + /** + * Gets the support upper bound of the distribution. + * + * @return the upper bound + */ + int getUpper() { + return upper; + } + + /** + * Gets the points to evaluate the CDF. + * + * @return the points + */ + int[] getCdfPoints() { + return cdfPoints; + } + + /** + * Gets the points to evaluate the PMF. + * + * @return the points + */ + int[] getPmfPoints() { + return pmfPoints; + } + + /** + * Gets the expected density values for the PMF test points. + * + * @return the PDF values + */ + double[] getPmfValues() { + return getPdfValues(); + } + + /** + * Gets the expected log density values for the PMF test points. + * + * @return the log PDF values + */ + double[] getLogPmfValues() { + return getLogPdfValues(); + } + + /** + * Gets the points to evaluate for survival function. + * + * @return the SF points + */ + int[] getSfPoints() { + return sfPoints; + } + + /** + * Gets the points to evaluate the cumulative probability where the result + * is expected to be approaching zero and requires a high-precision computation. + * + * @return the CDF high-precision points + */ + int[] getCdfHpPoints() { + return cdfHpPoints; + } + + /** + * Gets the points to evaluate the survival probability where the result + * is expected to be approaching zero and requires a high-precision computation. + * + * @return the survival function high-precision points + */ + int[] getSfHpPoints() { + return sfHpPoints; + } + + @Override + double[] getIcdfPoints() { + return icdfPoints; + } + + /** + * Gets the expected inverse cumulative probability values for the test inverse CDF points. + * + * @return the inverse CDF values + */ + int[] getIcdfValues() { + return icdfValues; + } + } + + /** + * @param props Properties containing the test data + */ + DistributionTestData(Properties props) { + // Load all the data + parameters = PATTERN.splitAsStream(get(props, "parameters")) + .map(DistributionTestData::parseParameter).toArray(); + mean = getAsDouble(props, "mean"); + variance = getAsDouble(props, "variance"); + connected = getAsBoolean(props, "connected", true); + tolerance = getAsDouble(props, "tolerance", 1e-4); + hpTolerance = getAsDouble(props, "tolerance.hp", 1e-22); + // Required + cdfValues = getAsDoubleArray(props, "cdf.values"); + final String pf = getProbabilityFunctionName(); + pdfValues = getAsDoubleArray(props, pf + ".values"); + // Optional + double[] tmp = getAsDoubleArray(props, "log" + pf + ".values", null); + if (tmp == null && pdfValues != null) { + tmp = Arrays.stream(pdfValues).map(Math::log).toArray(); + } + logPdfValues = tmp; + tmp = getAsDoubleArray(props, "sf.values", null); + if (tmp == null && cdfValues != null) { + tmp = Arrays.stream(cdfValues).map(d -> 1 - d).toArray(); + } + sfValues = tmp; + cdfHpValues = getAsDoubleArray(props, "cdf.hp.values", null); + sfHpValues = getAsDoubleArray(props, "sf.hp.values", null); + } + + /** + * Gets the name of the probability density function. + * For continuous distributions this is PDF and discrete distributions is PMF. + * + * @return the PDF name + */ + abstract String getProbabilityFunctionName(); + + /** + * Parses the String parameter to an appropriate object. Supports Double and Integer. + * + * @param value Value + * @return the object + * @throws IllegalArgumentException if the parameter type is unknown + */ + private static Object parseParameter(String value) { + // Only support int or double parameters. + // This uses inefficient parsing which will relies on catching parse exceptions. + try { + return Integer.parseInt(value); + } catch (NumberFormatException ex) { /* ignore */ } + try { + return Double.parseDouble(value); + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Unknown parameter type: " + value, ex); + } + } + + /** + * Gets the property. + * + * @param props Properties + * @param key Key + * @return the value + * @throws NullPointerException if the parameter is missing + */ + private static String get(Properties props, String key) { + return Objects.requireNonNull(props.getProperty(key), () -> "Missing test data: " + key); + } + + /** + * Gets the property as a double. + * + * @param props Properties + * @param key Key + * @return the value + * @throws NullPointerException if the parameter is missing. + * @throws IllegalArgumentException if the parameter is not a double. + */ + private static double getAsDouble(Properties props, String key) { + try { + return Double.parseDouble(get(props, key)); + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Invalid double: " + key); + } + } + + /** + * Gets the property as a double, or a default value if the property is missing. + * + * @param props Properties + * @param key Key + * @param defaultValue Default value + * @return the value + * @throws IllegalArgumentException if the parameter is not a double. + */ + private static double getAsDouble(Properties props, String key, double defaultValue) { + try { + final String s = props.getProperty(key); + return s == null ? defaultValue : Double.parseDouble(s); + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Invalid double: " + key); + } + } + + /** + * Gets the property as a double, or a default value if the property is missing. + * + * @param props Properties + * @param key Key + * @param defaultValue Default value + * @return the value + * @throws IllegalArgumentException if the parameter is not a double. + */ + private static int getAsInt(Properties props, String key, int defaultValue) { + try { + final String s = props.getProperty(key); + return s == null ? defaultValue : Integer.parseInt(s); + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Invalid double: " + key); + } + } + + /** + * Gets the property as a boolean, or a default value if the property is missing. + * + * @param props Properties + * @param key Key + * @param defaultValue Default value + * @return the value + * @throws IllegalArgumentException if the parameter is not a boolean. + */ + private static boolean getAsBoolean(Properties props, String key, boolean defaultValue) { + try { + final String s = props.getProperty(key); + return s == null ? defaultValue : Boolean.parseBoolean(s); + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Invalid boolean: " + key); + } + } + + /** + * Gets the property as a double array. + * + * @param props Properties + * @param key Key + * @return the value + * @throws NullPointerException if the parameter is missing. + * @throws IllegalArgumentException if the parameter is not a double array. + */ + private static double[] getAsDoubleArray(Properties props, String key) { + try { + return PATTERN.splitAsStream(get(props, key)).mapToDouble(Double::parseDouble).toArray(); + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Invalid double: " + key); + } + } + + /** + * Gets the property as a double array, or a default value if the property is missing. + * + * @param props Properties + * @param key Key + * @param defaultValue Default value + * @return the value + * @throws IllegalArgumentException if the parameter is not a double array. + */ + private static double[] getAsDoubleArray(Properties props, String key, double[] defaultValue) { + try { + final String s = props.getProperty(key); + return s == null ? defaultValue : + PATTERN.splitAsStream(s).mapToDouble(Double::parseDouble).toArray(); + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Invalid double: " + key); + } + } + /** + * Gets the property as a double array. + * + * @param props Properties + * @param key Key + * @return the value + * @throws NullPointerException if the parameter is missing. + * @throws IllegalArgumentException if the parameter is not a double array. + */ + private static int[] getAsIntArray(Properties props, String key) { + try { + return PATTERN.splitAsStream(get(props, key)).mapToInt(Integer::parseInt).toArray(); + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Invalid double: " + key); + } + } + + /** + * Gets the property as a double array, or a default value if the property is missing. + * + * @param props Properties + * @param key Key + * @param defaultValue Default value + * @return the value + * @throws IllegalArgumentException if the parameter is not a double array. + */ + private static int[] getAsIntArray(Properties props, String key, int[] defaultValue) { + try { + final String s = props.getProperty(key); + return s == null ? defaultValue : + PATTERN.splitAsStream(s).mapToInt(Integer::parseInt).toArray(); + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Invalid double: " + key); + } + } + + /** + * Validate a pair of point-value arrays have the same length if they are both non-zero length. + * + * @param p Array 1 + * @param v Array 2 + * @param name Name of the pair + */ + private static void validatePair(double[] p, double[] v, String name) { + validatePair(TestUtils.getLength(p), TestUtils.getLength(v), name); + } + + /** + * Validate a pair of point-value arrays have the same length if they are both non-zero length. + * + * @param p Array 1 + * @param v Array 2 + * @param name Name of the pair + */ + private static void validatePair(int[] p, double[] v, String name) { + validatePair(TestUtils.getLength(p), TestUtils.getLength(v), name); + } + + /** + * Validate a pair of point-value arrays have the same length if they are both non-zero length. + * + * @param p Array 1 + * @param v Array 2 + * @param name Name of the pair + */ + private static void validatePair(double[] p, int[] v, String name) { + validatePair(TestUtils.getLength(p), TestUtils.getLength(v), name); + } + + /** + * Validate a pair of point-value arrays have the same length if they are both non-zero length. + * + * @param p Length 1 + * @param v Length 2 + * @param name Name of the pair + */ + private static void validatePair(int l1, int l2, String name) { + // Arrays are used when non-zero in length. The lengths must match. + if (l1 != 0 && l2 != 0 && l1 != l2) { + throw new IllegalArgumentException( + String.format("Points-Values length mismatch for %s: %d != %d", name, l1, l2)); + } + } + + /** + * Gets the parameters used to create the distribution. + * + * @return the parameters + */ + Object[] getParameters() { + return parameters; + } + + /** + * Gets the mean of the distribution. + * + * @return the mean + */ + double getMean() { + return mean; + } + + /** + * Gets the variance of the distribution. + * + * @return the variance + */ + double getVariance() { + return variance; + } + + /** + * Checks if the support is connected (continuous from lower to upper bound). + * + * @return true if the support is connected + */ + boolean isConnected() { + return connected; + } + + /** + * Gets the tolerance used when comparing expected and actual results. + * + * @return the tolerance + */ + double getTolerance() { + return tolerance; + } + + /** + * Gets the tolerance used when comparing expected and actual results + * of high-precision computations. + * + * @return the tolerance + */ + double getHighPrecisionTolerance() { + return hpTolerance; + } + + /** + * Gets the expected cumulative probability values for the CDF test points. + * + * @return the CDF values + */ + double[] getCdfValues() { + return cdfValues; + } + + /** + * Gets the expected density values for the PDF test points. + * For a discrete distribution this represented the Probability Mass Function (PMF). + * + * @return the PDF values + */ + double[] getPdfValues() { + return pdfValues; + } + + /** + * Gets the expected log density values for the PDF test points. + * For a discrete distribution this represented the log of the Probability Mass Function (PMF). + * + * @return the log PDF values + */ + double[] getLogPdfValues() { + return logPdfValues; + } + + /** + * Gets the expected survival function values for the survival function test points. + * + * @return the SF values + */ + double[] getSfValues() { + return sfValues; + } + + /** + * Gets the expected cumulative probability values for the CDF high-precision test points. + * + * @return the CDF high-precision values + */ + double[] getCdfHpValues() { + return cdfHpValues; + } + + /** + * Gets the expected survival probability values for the survival function high-precision test points. + * + * @return the survival function high-precision values + */ + double[] getSfHpValues() { + return sfHpValues; + } + + /** + * Gets the points to evaluate the inverse CDF. + * + * @return the inverse CDF points + */ + abstract double[] getIcdfPoints(); +} + diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/NakagamiDistributionTest.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/NakagamiDistributionTest.java index e27b4b9..0999b84 100644 --- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/NakagamiDistributionTest.java +++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/NakagamiDistributionTest.java @@ -18,140 +18,34 @@ package org.apache.commons.statistics.distribution; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; /** - * Test cases for NakagamiDistribution. + * Test for the {@link NakagamiDistribution}. */ -class NakagamiDistributionTest extends ContinuousDistributionAbstractTest { - - //-------------- Implementations for abstract methods ---------------------- - - // Test values created using scipy.stats nakagami - // The distribution is not defined for x=0. - // Some implementations compute the formula and return the natural limit as x -> 0. - // This implementation returns zero for any x outside the domain. - - @Override - public NakagamiDistribution makeDistribution() { - return new NakagamiDistribution(0.5, 1); - } - - @Override - public double[] makeCumulativeTestPoints() { - return new double[] { - 0, 1e-3, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2 - }; - } - +class NakagamiDistributionTest extends BaseContinuousDistributionTest { @Override - public double[] makeDensityTestValues() { - return new double[] { - 0.0, 0.79788416186068489, 0.78208538795091187, - 0.73654028060664678, 0.66644920578359934, 0.57938310552296557, - 0.48394144903828679, 0.38837210996642596, 0.29945493127148981, - 0.22184166935891111, 0.15790031660178833, 0.10798193302637614, - }; + ContinuousDistribution makeDistribution(Object... parameters) { + return new NakagamiDistribution((Double) parameters[0], (Double) parameters[1]); } @Override - public double[] makeCumulativeTestValues() { - return new double[] { - 0.0, 7.97884427822125389e-04, - 1.58519418878206031e-01, 3.10843483220648364e-01, - 4.51493764499852956e-01, 5.76289202833206615e-01, - 6.82689492137085852e-01, 7.69860659556583560e-01, - 8.38486681532458089e-01, 8.90401416600884343e-01, - 9.28139361774148575e-01, 9.54499736103641472e-01, + Object[][] makeInvalidParameters() { + return new Object[][] { + {0.0, 1.0}, + {-0.1, 1.0}, + {0.5, 0.0}, + {0.5, -0.1} }; } @Override - public double[] makeCumulativePrecisionTestPoints() { - return new double[] {1e-16, 4e-17}; - } - - @Override - public double[] makeCumulativePrecisionTestValues() { - // These were created using WolframAlpha - return new double[] {7.978845608028653e-17, 3.1915382432114614e-17}; - } - - @Override - public double[] makeSurvivalPrecisionTestPoints() { - return new double[] {9, 8.7}; - } - - @Override - public double[] makeSurvivalPrecisionTestValues() { - // These were created using WolframAlpha - return new double[] {2.2571768119076845e-19, 3.318841739929575e-18}; + String[] getParameterNames() { + return new String[] {"Shape", "Scale"}; } //-------------------- Additional test cases ------------------------------- @Test - void testAdditionalDistribution1() { - final NakagamiDistribution dist = new NakagamiDistribution(1.0 / 3, 1); - setDistribution(dist); - setCumulativeTestPoints(makeCumulativeTestPoints()); - // Computed using scipy.stats nakagami - setCumulativeTestValues(new double[] { - 0., 0.00776458146673576, 0.26466318463713673, - 0.41599060641445568, 0.53633771818837206, 0.63551561797542433, - 0.71746556659624028, 0.7845448997061909, 0.83861986211366601, - 0.88141004735798412, 0.91458032800205946, 0.93973541101651015 - }); - setDensityTestValues(new double[] { - 0, 5.17638635039373352, 0.8734262427029803, - 0.66605658341650675, 0.54432849968092045, 0.45048535438453824, - 0.3709044132031733, 0.30141976583757241, 0.24075672187548078, - 0.18853365020699897, 0.14451001716499515, 0.10829893529327907 - }); - setInverseCumulativeTestPoints(getCumulativeTestValues()); - setInverseCumulativeTestValues(getCumulativeTestPoints()); - verifyDensities(); - verifyLogDensities(); - verifyCumulativeProbabilities(); - verifySurvivalProbability(); - verifySurvivalAndCumulativeProbabilityComplement(); - verifyInverseCumulativeProbabilities(); - } - - @Test - void testAdditionalDistribution2() { - final NakagamiDistribution dist = new NakagamiDistribution(1.5, 2); - setDistribution(dist); - setCumulativeTestPoints(makeCumulativeTestPoints()); - // Computed using matlab (scipy.stats does not support the omega parameter) - setCumulativeTestValues(new double[] { - 0, 0.000000000488602, - 0.003839209349952, 0.029112642643164, - 0.089980307387723, 0.189070530913232, - 0.317729669663787, 0.460129965238200, - 0.599031192110653, 0.720732382881390, - 0.817659600745483, 0.888389774905287, - }); - setDensityTestValues(new double[] { - 0, 0.000001465806436, - 0.056899455042812, 0.208008745554258, - 0.402828269545621, 0.580491109555755, - 0.692398452624549, 0.716805620039994, - 0.660571957322857, 0.550137830087772, - 0.418105970486118, 0.291913039977849, - }); - setInverseCumulativeTestPoints(getCumulativeTestValues()); - setInverseCumulativeTestValues(getCumulativeTestPoints()); - verifyDensities(); - verifyLogDensities(); - verifyCumulativeProbabilities(); - verifySurvivalProbability(); - verifySurvivalAndCumulativeProbabilityComplement(); - verifyInverseCumulativeProbabilities(); - } - - @Test void testExtremeLogDensity() { // XXX: Verify with more test data from a reference distribution final NakagamiDistribution dist = new NakagamiDistribution(0.5, 1); @@ -159,56 +53,4 @@ class NakagamiDistributionTest extends ContinuousDistributionAbstractTest { Assertions.assertEquals(0.0, dist.density(x)); Assertions.assertEquals(-1250.22579, dist.logDensity(x), 1e-4); } - - @ParameterizedTest - @CsvSource({ - "1.2, 2.1", - "0.5, 1", - }) - void testParameterAccessors(double shape, double scale) { - final NakagamiDistribution dist = new NakagamiDistribution(shape, scale); - Assertions.assertEquals(shape, dist.getShape()); - Assertions.assertEquals(scale, dist.getScale()); - } - - @ParameterizedTest - @CsvSource({ - "0.0, 1.0", - "-0.1, 1.0", - "0.5, 0.0", - "0.5, -0.1", - }) - void testConstructorPreconditions(double shape, double scale) { - Assertions.assertThrows(DistributionException.class, () -> new NakagamiDistribution(shape, scale)); - } - - @Test - void testMoments() { - // Values obtained using Matlab, e.g. - // format long; - // pd = makedist('Nakagami','mu',0.5,'omega',1.0); - // disp([pd.mean, pd.var]) - NakagamiDistribution dist; - final double eps = 1e-9; - - dist = new NakagamiDistribution(0.5, 1.0); - Assertions.assertEquals(0.797884560802866, dist.getMean(), eps); - Assertions.assertEquals(0.363380227632418, dist.getVariance(), eps); - - dist = new NakagamiDistribution(1.23, 2.5); - Assertions.assertEquals(1.431786259006201, dist.getMean(), eps); - Assertions.assertEquals(0.449988108521028, dist.getVariance(), eps); - - dist = new NakagamiDistribution(1.0 / 3, 2.0); - Assertions.assertEquals(1.032107387207478, dist.getMean(), eps); - Assertions.assertEquals(0.934754341271753, dist.getVariance(), eps); - } - - @Test - void testSupport() { - final NakagamiDistribution dist = makeDistribution(); - Assertions.assertEquals(0, dist.getSupportLowerBound()); - Assertions.assertEquals(Double.POSITIVE_INFINITY, dist.getSupportUpperBound()); - Assertions.assertTrue(dist.isSupportConnected()); - } } diff --git a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TestUtils.java b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TestUtils.java index ca6dfc3..1ba3811 100644 --- a/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TestUtils.java +++ b/commons-statistics-distribution/src/test/java/org/apache/commons/statistics/distribution/TestUtils.java @@ -17,6 +17,7 @@ package org.apache.commons.statistics.distribution; +import java.lang.reflect.Array; import java.text.DecimalFormat; import java.util.function.Supplier; import org.apache.commons.math3.stat.inference.ChiSquareTest; @@ -304,4 +305,35 @@ final class TestUtils { } return samples; } + + /** + * Gets the length of the array. + * + * @param array Array + * @return the length (or 0 for null array) + */ + static int getLength(double[] array) { + return array == null ? 0 : array.length; + } + + /** + * Gets the length of the array. + * + * @param array Array + * @return the length (or 0 for null array) + */ + static int getLength(int[] array) { + return array == null ? 0 : array.length; + } + + /** + * Gets the length of the array. + * + * @param array Array + * @return the length (or 0 for null array) + * @throws IllegalArgumentException if the object is not an array + */ + static int getLength(Object array) { + return array == null ? 0 : Array.getLength(array); + } } diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.0.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.0.properties new file mode 100644 index 0000000..b0e405f --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.0.properties @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +parameters = 10 0.7 +# 10 * 0.7 +mean = 7 +# 10 * 0.7 * 0.3 +variance = 2.1 +lower = 0 +upper = 10 +connected = true +tolerance = 1e-12 +cdf.points = -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 +# Reference values are from R, version 2.15.3. +cdf.values = \ + 0d, 5.9049e-06, 0.0001436859, 0.0015903864, 0.0105920784, 0.0473489874,\ + 0.1502683326, 0.3503892816, 0.6172172136, 0.8506916541, 0.9717524751, 1d, 1d +pmf.values = \ + 0d, 0.0000059049d, 0.000137781d, 0.0014467005,\ + 0.009001692, 0.036756909, 0.1029193452, 0.200120949, 0.266827932,\ + 0.2334744405, 0.121060821, 0.0282475249, 0d +icdf.points = \ + 0, 0.001d, 0.010d, 0.025d, 0.050d, 0.100d,\ + 0.999d, 0.990d, 0.975d, 0.950d, 0.900d, 1d +icdf.values = 0, 2, 3, 4, 5, 5, 10, 10, 10, 9, 9, 10 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.1.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.1.properties new file mode 100644 index 0000000..1a50c4b --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.1.properties @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +parameters = 100 0.99 +# 100 * 0.99 +mean = 99 +# 100 * 0.99 * 0.01 +variance = 0.99 +lower = 0 +upper = 100 +# Set values for the required fields. +# Most tests are skipped if not enough of the PMF is covered by points. +cdf.points = 100 +cdf.values = 1.0 +pmf.points = 0 +pmf.values = 1e-200 +icdf.points = 0 +icdf.values = 0 +# computed using R version 3.4.4 +cdf.hp.points = 82, 81 +cdf.hp.values = 1.4061271955993513664e-17, 6.1128083336354843707e-19 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.2.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.2.properties new file mode 100644 index 0000000..a109431 --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.2.properties @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +parameters = 100 0.01 +# 100 * 0.01 +mean = 1 +# 100 * 0.01 * 0.99 +variance = 0.99 +lower = 0 +upper = 100 +# Set values for the required fields. +# Most tests are skipped if not enough of the PMF is covered by points. +cdf.points = 100 +cdf.values = 1.0 +pmf.points = 100 +pmf.values = 1e-200 +icdf.points = 0 +icdf.values = 0 +# computed using R version 3.4.4 +sf.hp.points = 18, 19 +sf.hp.values = 6.1128083336353977038e-19, 2.4944165604029235392e-20 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.3.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.3.properties new file mode 100644 index 0000000..804c8cb --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.3.properties @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +parameters = 10 0.3 +# 10 * 0.3 +mean = 3 +# 10 * 0.3 * 0.7 +variance = 2.1 +lower = 0 +upper = 10 +connected = true +tolerance = 1e-12 +cdf.points = -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 +# Reference values are from R version 3.4.4 +cdf.values = \ + 0.00000000000000000000, 0.02824752489999998728, 0.14930834590000002793,\ + 0.38278278639999974153, 0.64961071840000017552, 0.84973166740000016794, 0.95265101260000006889,\ + 0.98940792160000001765, 0.99840961360000002323, 0.99985631409999997654, 0.99999409509999992451,\ + 1.00000000000000000000, 1.00000000000000000000 +pmf.values = \ + 0.0000000000000000000e+00, 2.8247524899999980341e-02,\ + 1.2106082099999991575e-01, 2.3347444049999999116e-01, 2.6682793199999993439e-01, 2.0012094900000007569e-01,\ + 1.0291934520000002584e-01, 3.6756909000000004273e-02, 9.0016919999999864960e-03, 1.4467005000000008035e-03,\ + 1.3778099999999990615e-04, 5.9048999999999949131e-06, 0.0000000000000000000e+00 +icdf.points = \ + 0, 0.001d, 0.010d, 0.025d, 0.050d, 0.100d,\ + 0.999d, 0.990d, 0.975d, 0.950d, 0.900d, 1d +icdf.values = 0, 0, 0, 0, 1, 1, 8, 7, 6, 5, 5, 10 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.4.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.4.properties new file mode 100644 index 0000000..000b25a --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.4.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Degenerate p=1 case +parameters = 5 1.0 +mean = 5 +variance = 0 +lower = 5 +upper = 5 +cdf.points = -1, 0, 1, 2, 5, 10 +cdf.values = 0, 0, 0, 0, 1, 1 +pmf.values = 0, 0, 0, 0, 1, 0 +icdf.points = 0.1, 0.5 +icdf.values = 5, 5 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.5.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.5.properties new file mode 100644 index 0000000..238db21 --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.5.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Degenerate n=0 case +parameters = 0 0.01 +mean = 0 +variance = 0 +lower = 0 +upper = 0 +cdf.points = -1, 0, 1, 2, 5, 10 +cdf.values = 0, 1, 1, 1, 1, 1 +pmf.values = 0, 1, 0, 0, 0, 0 +icdf.points = 0.1, 0.5 +icdf.values = 0, 0 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.6.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.6.properties new file mode 100644 index 0000000..30ff87e --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.binomial.6.properties @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Degenerate p=0 case +parameters = 5 0.0 +mean = 0 +variance = 0 +lower = 0 +upper = 0 +cdf.points = -1, 0, 1, 2, 5, 10 +cdf.values = 0, 1, 1, 1, 1, 1 +pmf.values = 0, 1, 0, 0, 0, 0 +icdf.points = 0.1, 0.5 +icdf.values = 0, 0 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.nakagami.0.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.nakagami.0.properties new file mode 100644 index 0000000..eb96e66 --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.nakagami.0.properties @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +parameters = 0.5 1.0 +# Computed using matlab +mean = 0.797884560802866 +variance = 0.363380227632418 +lower = 0 +upper = Infinity +connected = true +tolerance = 1e-9 +cdf.points = 0, 1e-3, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2 +# Computed using scipy +cdf.values = \ + 0.0, 7.97884427822125389e-04,\ + 1.58519418878206031e-01, 3.10843483220648364e-01,\ + 4.51493764499852956e-01, 5.76289202833206615e-01,\ + 6.82689492137085852e-01, 7.69860659556583560e-01,\ + 8.38486681532458089e-01, 8.90401416600884343e-01,\ + 9.28139361774148575e-01, 9.54499736103641472e-01 +pdf.values = \ + 0.0, 0.79788416186068489, 0.78208538795091187,\ + 0.73654028060664678, 0.66644920578359934, 0.57938310552296557,\ + 0.48394144903828679, 0.38837210996642596, 0.29945493127148981,\ + 0.22184166935891111, 0.15790031660178833, 0.10798193302637614, +# Computed using WolframAlpha +cdf.hp.points = 1e-16, 4e-17 +cdf.hp.values = 7.978845608028653e-17, 3.1915382432114614e-17 +sf.hp.points = 9, 8.7 +sf.hp.values = 2.2571768119076845e-19, 3.318841739929575e-18 diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.nakagami.1.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.nakagami.1.properties new file mode 100644 index 0000000..2917afc --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.nakagami.1.properties @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +parameters = 1.5 2.0 +# Computed using matlab +mean = 1.302940031741120 +variance = 0.302347273686450 +lower = 0 +upper = Infinity +connected = true +tolerance = 1e-9 +cdf.points = 0, 1e-3, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2 +cdf.values = \ + 0.0, 0.000000000488602,\ + 0.003839209349952, 0.029112642643164,\ + 0.089980307387723, 0.189070530913232,\ + 0.317729669663787, 0.460129965238200,\ + 0.599031192110653, 0.720732382881390,\ + 0.817659600745483, 0.888389774905287, +pdf.values = + 0.0, 0.000001465806436,\ + 0.056899455042812, 0.208008745554258,\ + 0.402828269545621, 0.580491109555755,\ + 0.692398452624549, 0.716805620039994,\ + 0.660571957322857, 0.550137830087772,\ + 0.418105970486118, 0.291913039977849, diff --git a/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.nakagami.2.properties b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.nakagami.2.properties new file mode 100644 index 0000000..fe7c3be --- /dev/null +++ b/commons-statistics-distribution/src/test/resources/org/apache/commons/statistics/distribution/test.nakagami.2.properties @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +parameters = 0.3333333333333333333 1.0 +# Computed using matlab +mean = 0.729810132407137 +variance = 0.467377170635876 +lower = 0 +upper = Infinity +tolerance = 1e-9 +cdf.points = 0, 1e-3, 0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4, 1.6, 1.8, 2 +# Computed using scipy +cdf.values = \ + 0.0, 0.00776458146673576, 0.26466318463713673,\ + 0.41599060641445568, 0.53633771818837206, 0.63551561797542433,\ + 0.71746556659624028, 0.7845448997061909, 0.83861986211366601,\ + 0.88141004735798412, 0.91458032800205946, 0.93973541101651015 +pdf.values = \ + 0.0, 5.17638635039373352, 0.8734262427029803,\ + 0.66605658341650675, 0.54432849968092045, 0.45048535438453824,\ + 0.3709044132031733, 0.30141976583757241, 0.24075672187548078,\ + 0.18853365020699897, 0.14451001716499515, 0.10829893529327907 diff --git a/src/main/resources/checkstyle/checkstyle.xml b/src/main/resources/checkstyle/checkstyle.xml index c96e678..1ea69b4 100644 --- a/src/main/resources/checkstyle/checkstyle.xml +++ b/src/main/resources/checkstyle/checkstyle.xml @@ -75,6 +75,7 @@ <!-- See http://checkstyle.sourceforge.net/config_header.html --> <module name="Header"> <property name="headerFile" value="${checkstyle.header.file}"/> + <property name="fileExtensions" value="java"/> </module> <module name="TreeWalker">
