Author: psteitz
Date: Tue Jan 8 21:16:00 2008
New Revision: 610274
URL: http://svn.apache.org/viewvc?rev=610274&view=rev
Log:
Added check and rescaling of expected counts to sum to sum of expected
counts if necessary in ChiSquare test.
JIRA: MATH-175
Reported and patched by Carl Anderson.
Modified:
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java
commons/proper/math/trunk/src/test/R/chiSquareTestCases
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/ChiSquareTestTest.java
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/TestUtilsTest.java
commons/proper/math/trunk/xdocs/changes.xml
Modified:
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java?rev=610274&r1=610273&r2=610274&view=diff
==============================================================================
---
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java
(original)
+++
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/ChiSquareTestImpl.java
Tue Jan 8 21:16:00 2008
@@ -50,6 +50,11 @@
setDistribution(x);
}
/**
+ * [EMAIL PROTECTED]
+ * <p><strong>Note: </strong>This implementation rescales the
+ * <code>expected</code> array if necessary to ensure that the sum of the
+ * expected and observed counts are equal.</p>
+ *
* @param observed array of observed frequency counts
* @param expected array of expected frequency counts
* @return chi-square test statistic
@@ -58,8 +63,6 @@
*/
public double chiSquare(double[] expected, long[] observed)
throws IllegalArgumentException {
- double sumSq = 0.0d;
- double dev = 0.0d;
if ((expected.length < 2) || (expected.length != observed.length)) {
throw new IllegalArgumentException(
"observed, expected array lengths incorrect");
@@ -68,14 +71,38 @@
throw new IllegalArgumentException(
"observed counts must be non-negative and expected counts must
be postive");
}
+ double sumExpected = 0d;
+ double sumObserved = 0d;
+ for (int i = 0; i < observed.length; i++) {
+ sumExpected += expected[i];
+ sumObserved += observed[i];
+ }
+ double ratio = 1.0d;
+ boolean rescale = false;
+ if (Math.abs(sumExpected - sumObserved) > 10E-6) {
+ ratio = sumObserved / sumExpected;
+ rescale = true;
+ }
+ double sumSq = 0.0d;
+ double dev = 0.0d;
for (int i = 0; i < observed.length; i++) {
- dev = ((double) observed[i] - expected[i]);
- sumSq += dev * dev / expected[i];
+ if (rescale) {
+ dev = ((double) observed[i] - ratio * expected[i]);
+ sumSq += dev * dev / (ratio * expected[i]);
+ } else {
+ dev = ((double) observed[i] - expected[i]);
+ sumSq += dev * dev / expected[i];
+ }
}
return sumSq;
}
/**
+ * [EMAIL PROTECTED]
+ * <p><strong>Note: </strong>This implementation rescales the
+ * <code>expected</code> array if necessary to ensure that the sum of the
+ * expected and observed counts are equal.</p>
+ *
* @param observed array of observed frequency counts
* @param expected array of exptected frequency counts
* @return p-value
@@ -90,6 +117,11 @@
}
/**
+ * [EMAIL PROTECTED]
+ * <p><strong>Note: </strong>This implementation rescales the
+ * <code>expected</code> array if necessary to ensure that the sum of the
+ * expected and observed counts are equal.</p>
+ *
* @param observed array of observed frequency counts
* @param expected array of exptected frequency counts
* @param alpha significance level of the test
Modified: commons/proper/math/trunk/src/test/R/chiSquareTestCases
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/chiSquareTestCases?rev=610274&r1=610273&r2=610274&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/R/chiSquareTestCases (original)
+++ commons/proper/math/trunk/src/test/R/chiSquareTestCases Tue Jan 8 21:16:00
2008
@@ -49,8 +49,9 @@
verifyHomogeneity <- function(obs, exp, expectedP, expectedStat,
tol, desc) {
- chi <- sum((obs - exp)^2/exp)
- p <- 1 - pchisq(sum((obs - exp)^2/exp), length(obs) - 1)
+ results <- chisq.test(obs,p=exp,rescale.p=TRUE)
+ chi <- results$statistic
+ p <- results$p.value
if (assertEquals(expectedP, p, tol, "p-value")) {
displayPadded(c(desc, " p-value test"), SUCCEEDED, WIDTH)
} else {
@@ -73,14 +74,14 @@
observed <- c(500, 623, 72, 70, 31)
expected <- c(485, 541, 82, 61, 37)
-verifyHomogeneity(observed, expected, 0.002512096, 16.4131070362, tol,
- "testChiSquare2")
+verifyHomogeneity(observed, expected, 0.06051952647453607, 9.023307936427388,
+ tol, "testChiSquare2")
observed <- c(2372383, 584222, 257170, 17750155, 7903832, 489265,
209628, 393899)
expected <- c(3389119.5, 649136.6, 285745.4, 25357364.76, 11291189.78,
543628.0, 232921.0, 437665.75)
-verifyHomogeneity(observed, expected, 0, 3624883.342907764, tol,
+verifyHomogeneity(observed, expected, 0, 114875.90421929007, tol,
"testChiSquareLargeTestStatistic")
counts <- matrix(c(40, 22, 43, 91, 21, 28, 60, 10, 22), nc = 3);
Modified:
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/ChiSquareTestTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/ChiSquareTestTest.java?rev=610274&r1=610273&r2=610274&view=diff
==============================================================================
---
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/ChiSquareTestTest.java
(original)
+++
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/ChiSquareTestTest.java
Tue Jan 8 21:16:00 2008
@@ -57,10 +57,10 @@
long[] observed1 = { 500, 623, 72, 70, 31 };
double[] expected1 = { 485, 541, 82, 61, 37 };
- assertEquals( "chi-square test statistic", 16.4131070362,
testStatistic.chiSquare(expected1, observed1), 1E-10);
- assertEquals("chi-square p-value", 0.002512096,
testStatistic.chiSquareTest(expected1, observed1), 1E-9);
- assertTrue("chi-square test reject",
testStatistic.chiSquareTest(expected1, observed1, 0.003));
- assertTrue("chi-square test accept",
!testStatistic.chiSquareTest(expected1, observed1, 0.002));
+ assertEquals( "chi-square test statistic", 9.023307936427388,
testStatistic.chiSquare(expected1, observed1), 1E-10);
+ assertEquals("chi-square p-value", 0.06051952647453607,
testStatistic.chiSquareTest(expected1, observed1), 1E-9);
+ assertTrue("chi-square test reject",
testStatistic.chiSquareTest(expected1, observed1, 0.08));
+ assertTrue("chi-square test accept",
!testStatistic.chiSquareTest(expected1, observed1, 0.05));
try {
testStatistic.chiSquareTest(expected1, observed1, 95);
@@ -181,7 +181,7 @@
double cst = csti.chiSquareTest(exp, obs);
assertEquals("chi-square p-value", 0.0, cst, 1E-3);
assertEquals( "chi-square test statistic",
- 3624883.342907764, testStatistic.chiSquare(exp, obs), 1E-9);
+ 114875.90421929007, testStatistic.chiSquare(exp, obs), 1E-9);
}
/** Contingency table containing zeros - PR # 32531 */
Modified:
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/TestUtilsTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/TestUtilsTest.java?rev=610274&r1=610273&r2=610274&view=diff
==============================================================================
---
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/TestUtilsTest.java
(original)
+++
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/TestUtilsTest.java
Tue Jan 8 21:16:00 2008
@@ -55,10 +55,10 @@
long[] observed1 = { 500, 623, 72, 70, 31 };
double[] expected1 = { 485, 541, 82, 61, 37 };
- assertEquals( "chi-square test statistic", 16.4131070362,
TestUtils.chiSquare(expected1, observed1), 1E-10);
- assertEquals("chi-square p-value", 0.002512096,
TestUtils.chiSquareTest(expected1, observed1), 1E-9);
- assertTrue("chi-square test reject",
TestUtils.chiSquareTest(expected1, observed1, 0.003));
- assertTrue("chi-square test accept",
!TestUtils.chiSquareTest(expected1, observed1, 0.002));
+ assertEquals( "chi-square test statistic", 9.023307936427388,
TestUtils.chiSquare(expected1, observed1), 1E-10);
+ assertEquals("chi-square p-value", 0.06051952647453607,
TestUtils.chiSquareTest(expected1, observed1), 1E-9);
+ assertTrue("chi-square test reject",
TestUtils.chiSquareTest(expected1, observed1, 0.07));
+ assertTrue("chi-square test accept",
!TestUtils.chiSquareTest(expected1, observed1, 0.05));
try {
TestUtils.chiSquareTest(expected1, observed1, 95);
@@ -179,7 +179,7 @@
double cst = csti.chiSquareTest(exp, obs);
assertEquals("chi-square p-value", 0.0, cst, 1E-3);
assertEquals( "chi-square test statistic",
- 3624883.342907764, TestUtils.chiSquare(exp, obs), 1E-9);
+ 114875.90421929007, TestUtils.chiSquare(exp, obs), 1E-9);
}
/** Contingency table containing zeros - PR # 32531 */
Modified: commons/proper/math/trunk/xdocs/changes.xml
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/xdocs/changes.xml?rev=610274&r1=610273&r2=610274&view=diff
==============================================================================
--- commons/proper/math/trunk/xdocs/changes.xml (original)
+++ commons/proper/math/trunk/xdocs/changes.xml Tue Jan 8 21:16:00 2008
@@ -116,6 +116,10 @@
Changed Mean.evaluate() to use a two-pass algorithm, improving accuracy
by exploiting the the fact that this method has access to the full
array of data values.
+ </action>
+ <action dev="psteitz" type="fix" issue="MATH-175" due-to="Carl Anderson">
+ Added check and rescaling of expected counts to sum to sum of expected
+ counts if necessary in ChiSquare test.
</action>
</release>
<release version="1.1" date="2005-12-17"