Author: jbaldrid
Date: Wed Jun 8 04:39:32 2011
New Revision: 1133246
URL: http://svn.apache.org/viewvc?rev=1133246&view=rev
Log:
OPENNLP-155 Fixed the perceptron, most importantly: uses standard update,
stepsize is gradually diminished to ensure stability, and averaging is
simplified and improved. Added prepositional phrase attachment dataset and
added perceptron unit test on that data.
Added:
incubator/opennlp/trunk/opennlp-maxent/src/main/java/opennlp/model/ListEventStream.java
incubator/opennlp/trunk/opennlp-maxent/src/test/java/opennlp/perceptron/
incubator/opennlp/trunk/opennlp-maxent/src/test/java/opennlp/perceptron/PerceptronPrepAttachTest.java
incubator/opennlp/trunk/opennlp-maxent/src/test/resources/data/ppa/
incubator/opennlp/trunk/opennlp-maxent/src/test/resources/data/ppa/README
incubator/opennlp/trunk/opennlp-maxent/src/test/resources/data/ppa/bitstrings
incubator/opennlp/trunk/opennlp-maxent/src/test/resources/data/ppa/devset
incubator/opennlp/trunk/opennlp-maxent/src/test/resources/data/ppa/test
incubator/opennlp/trunk/opennlp-maxent/src/test/resources/data/ppa/training
Modified:
incubator/opennlp/trunk/opennlp-maxent/src/main/java/opennlp/perceptron/PerceptronTrainer.java
Added:
incubator/opennlp/trunk/opennlp-maxent/src/main/java/opennlp/model/ListEventStream.java
URL:
http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-maxent/src/main/java/opennlp/model/ListEventStream.java?rev=1133246&view=auto
==============================================================================
---
incubator/opennlp/trunk/opennlp-maxent/src/main/java/opennlp/model/ListEventStream.java
(added)
+++
incubator/opennlp/trunk/opennlp-maxent/src/main/java/opennlp/model/ListEventStream.java
Wed Jun 8 04:39:32 2011
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.model;
+
+import java.util.List;
+
+public class ListEventStream implements EventStream {
+ List<Event> events;
+ int currentIndex = 0;
+ int numEvents;
+
+ public ListEventStream (List<Event> events) {
+ this.events = events;
+ numEvents = events.size();
+ }
+
+ public Event next () {
+ return events.get(currentIndex++);
+ }
+
+ public boolean hasNext () {
+ return currentIndex< numEvents;
+ }
+
+}
\ No newline at end of file
Modified:
incubator/opennlp/trunk/opennlp-maxent/src/main/java/opennlp/perceptron/PerceptronTrainer.java
URL:
http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-maxent/src/main/java/opennlp/perceptron/PerceptronTrainer.java?rev=1133246&r1=1133245&r2=1133246&view=diff
==============================================================================
---
incubator/opennlp/trunk/opennlp-maxent/src/main/java/opennlp/perceptron/PerceptronTrainer.java
(original)
+++
incubator/opennlp/trunk/opennlp-maxent/src/main/java/opennlp/perceptron/PerceptronTrainer.java
Wed Jun 8 04:39:32 2011
@@ -67,48 +67,26 @@ public class PerceptronTrainer {
understandable terms. */
private String[] predLabels;
- /** Stores the estimated parameter value of each predicate during iteration.
*/
- private MutableContext[] params;
-
- private int[][][] updates;
- private int VALUE = 0;
- private int ITER = 1;
- private int EVENT = 2;
-
- /** Stores the average parameter values of each predicate during iteration.
*/
- private MutableContext[] averageParams;
-
- private EvalParameters evalParams;
-
private boolean printMessages = true;
- double[] modelDistribution;
-
- private int iterations;
- private boolean useAverage;
-
public AbstractModel trainModel(int iterations, DataIndexer di, int cutoff)
{
- this.iterations = iterations;
return trainModel(iterations,di,cutoff,true);
}
public AbstractModel trainModel(int iterations, DataIndexer di, int cutoff,
boolean useAverage) {
display("Incorporating indexed data for training... \n");
- this.useAverage = useAverage;
contexts = di.getContexts();
values = di.getValues();
numTimesEventsSeen = di.getNumTimesEventsSeen();
numEvents = di.getNumEvents();
numUniqueEvents = contexts.length;
- this.iterations = iterations;
outcomeLabels = di.getOutcomeLabels();
outcomeList = di.getOutcomeList();
predLabels = di.getPredLabels();
numPreds = predLabels.length;
numOutcomes = outcomeLabels.length;
- if (useAverage) updates = new int[numPreds][numOutcomes][3];
display("done.\n");
@@ -116,178 +94,214 @@ public class PerceptronTrainer {
display("\t Number of Outcomes: " + numOutcomes + "\n");
display("\t Number of Predicates: " + numPreds + "\n");
+ display("Computing model parameters...\n");
+
+ MutableContext[] finalParameters = findParameters(iterations, useAverage);
+
+ display("...done.\n");
+
+ /*************** Create and return the model ******************/
+ return new PerceptronModel(finalParameters, predLabels, outcomeLabels);
+ }
+
+ private MutableContext[] findParameters (int iterations, boolean useAverage)
{
+
+ display("Performing " + iterations + " iterations.\n");
- params = new MutableContext[numPreds];
- if (useAverage) averageParams = new MutableContext[numPreds];
- evalParams = new EvalParameters(params,numOutcomes);
-
int[] allOutcomesPattern= new int[numOutcomes];
- for (int oi = 0; oi< numOutcomes; oi++) {
+ for (int oi = 0; oi< numOutcomes; oi++)
allOutcomesPattern[oi] = oi;
- }
-
+
+ /** Stores the estimated parameter value of each predicate during
iteration. */
+ MutableContext[] params = new MutableContext[numPreds];
for (int pi = 0; pi< numPreds; pi++) {
params[pi] = new MutableContext(allOutcomesPattern,new
double[numOutcomes]);
- if (useAverage)
- averageParams[pi] = new MutableContext(allOutcomesPattern,new
double[numOutcomes]);
- for (int aoi=0;aoi<numOutcomes;aoi++) {
- params[pi].setParameter(aoi, 0.0);
- if (useAverage)
- averageParams[pi].setParameter(aoi, 0.0);
- }
+ for (int aoi=0;aoi<numOutcomes;aoi++)
+ params[pi].setParameter(aoi, 0.0);
}
- modelDistribution = new double[numOutcomes];
- display("Computing model parameters...\n");
- findParameters(iterations);
- display("...done.\n");
+ EvalParameters evalParams = new EvalParameters(params,numOutcomes);
+
+ /** Stores the sum of parameter values of each predicate over many
iterations. */
+ MutableContext[] summedParams = new MutableContext[numPreds];
+ if (useAverage) {
+ for (int pi = 0; pi< numPreds; pi++) {
+ summedParams[pi] = new MutableContext(allOutcomesPattern,new
double[numOutcomes]);
+ for (int aoi=0;aoi<numOutcomes;aoi++)
+ summedParams[pi].setParameter(aoi, 0.0);
+ }
+ }
- /*************** Create and return the model ******************/
- if (useAverage)
- return new PerceptronModel(averageParams, predLabels, outcomeLabels);
- else
- return new PerceptronModel(params, predLabels, outcomeLabels);
- }
+ // If the change in training set accuracy is less than this, stop
iterating.
+ double tolerance = .00001;
- private void display(String s) {
- if (printMessages)
- System.out.print(s);
- }
-
- private void findParameters(int iterations) {
+ // Keep track of the previous three accuracies. The difference of
+ // the mean of these and the current training set accuracy is used
+ // with tolerance to decide whether to stop.
+ double prevAccuracy1 = 0.0;
+ double prevAccuracy2 = 0.0;
+ double prevAccuracy3 = 0.0;
- display("Performing " + iterations + " iterations.\n");
+ // A counter for the denominator for averaging.
+ int numTimesSummed = 0;
- int numTimesSameAccuracy = 0;
- double prevAccuracy = 0.0;
+ double stepsize = 1.05;
for (int i = 1; i<= iterations; i++) {
- if (i< 10)
- display(" " + i + ": ");
- else if (i< 100)
- display(" " + i + ": ");
- else
- display(i + ": ");
- nextIteration(i);
-
- // Need to do this for the full set to get a representative
- // accuracy -- doing it while training is biased because the
- // events are ordered according to their outcomes.
- double currAccuracy = trainingStats(averageParams);
+
+ // Decrease the stepsize by a small amount.
+ stepsize /= 1.05;
- if (currAccuracy == prevAccuracy) {
- numTimesSameAccuracy++;
- } else {
- prevAccuracy = currAccuracy;
- numTimesSameAccuracy = 0;
+ displayIteration(i);
+
+ int numCorrect = 0;
+ int total = 0;
+
+ for (int ei = 0; ei< numUniqueEvents; ei++) {
+ int targetOutcome = outcomeList[ei];
+
+ for (int ni=0; ni<this.numTimesEventsSeen[ei]; ni++) {
+
+ // Compute the model's prediction according to the current
parameters.
+ double[] modelDistribution = new double[numOutcomes];
+ if (values != null)
+ PerceptronModel.eval(contexts[ei], values[ei], modelDistribution,
evalParams, false);
+ else
+ PerceptronModel.eval(contexts[ei], null, modelDistribution,
evalParams, false);
+
+ int maxOutcome = maxIndex(modelDistribution);
+
+ // If the predicted outcome is different from the target
+ // outcome, do the standard update: boost the parameters
+ // associated with the target and reduce those associated
+ // with the incorrect predicted outcome.
+ if (maxOutcome != targetOutcome) {
+ for (int ci = 0; ci< contexts[ei].length; ci++) {
+ int pi = contexts[ei][ci];
+ if (values == null) {
+ params[pi].updateParameter(targetOutcome, stepsize);
+ params[pi].updateParameter(maxOutcome, -stepsize);
+ } else {
+ params[pi].updateParameter(targetOutcome,
stepsize*values[ei][ci]);
+ params[pi].updateParameter(maxOutcome,
-stepsize*values[ei][ci]);
+ }
+ }
+ }
+
+ // Update the counts for accuracy.
+ total++;
+ if (maxOutcome == targetOutcome)
+ numCorrect++;
+ }
+ }
+
+ // Calculate the training accuracy and display.
+ double trainingAccuracy = (double) numCorrect / numEvents;
+ if (i< 10 || (i%10) == 0)
+ display(". (" + numCorrect + "/" + numEvents+") " + trainingAccuracy +
"\n");
+
+ // If we are doing averaging, and the current iteration is one
+ // of the first 20 or it is a perfect square, then updated the
+ // summed parameters. The reason we don't take all of them is
+ // that the parameters change less toward the end of training,
+ // so they drown out the contributions of the more volatile
+ // early iterations. The use of perfect squares allows us to
+ // sample from successively farther apart iterations.
+ if (useAverage&& (i< 20 || isPerfectSquare(i))) {
+ numTimesSummed++;
+ for (int pi = 0; pi< numPreds; pi++)
+ for (int aoi=0;aoi<numOutcomes;aoi++)
+ summedParams[pi].updateParameter(aoi,
params[pi].getParameters()[aoi]);
}
- // If the accuracy hasn't changed for four iterations, stop training.
- if (numTimesSameAccuracy == 4) {
- display("Accuracy repeated 4 times, stopping training.\n");
+ // If the tolerance is greater than the difference between the
+ // current training accuracy and all of the previous three
+ // training accuracies, stop training.
+ if (Math.abs(prevAccuracy1-trainingAccuracy)< tolerance
+&& Math.abs(prevAccuracy2-trainingAccuracy)< tolerance
+&& Math.abs(prevAccuracy3-trainingAccuracy)< tolerance) {
+ display("Stopping: change in training set accuracy less than " + tolerance +
"\n");
break;
}
+
+ // Update the previous training accuracies.
+ prevAccuracy1 = prevAccuracy2;
+ prevAccuracy2 = prevAccuracy3;
+ prevAccuracy3 = trainingAccuracy;
}
- if (useAverage)
- trainingStats(averageParams);
- else
- trainingStats(params);
- // kill a bunch of these big objects now that we don't need them
- numTimesEventsSeen = null;
- contexts = null;
- }
-
- /* Compute one iteration of Perceptron.*/
- private void nextIteration(int iteration) {
- iteration--; //move to 0-based index
- int oei = 0;
- for (int ei = 0; ei< numUniqueEvents; ei++, oei++) {
- for (int ni=0;ni<this.numTimesEventsSeen[ei];ni++) {
+ // Output the final training stats.
+ trainingStats(evalParams);
- for (int oi = 0; oi< numOutcomes; oi++)
- modelDistribution[oi] = 0;
+ // Create averaged parameters
+ if (useAverage) {
+ for (int pi = 0; pi< numPreds; pi++)
+ for (int aoi=0;aoi<numOutcomes;aoi++)
+ summedParams[pi].setParameter(aoi,
summedParams[pi].getParameters()[aoi]/numTimesSummed);
- if (values != null)
- PerceptronModel.eval(contexts[ei], values[ei], modelDistribution,
evalParams,false);
- else
- PerceptronModel.eval(contexts[ei], null, modelDistribution,
evalParams, false);
+ return summedParams;
- int max = 0;
- for (int oi = 1; oi< numOutcomes; oi++)
- if (modelDistribution[oi]> modelDistribution[max])
- max = oi;
-
- for (int oi = 0;oi<numOutcomes;oi++) {
- int updateValue = -1;
- if (oi == outcomeList[oei])
- updateValue = 1;
-
- if (modelDistribution[oi]*updateValue<= 0) {
- for (int ci = 0; ci< contexts[ei].length; ci++) {
- int pi = contexts[ei][ci];
- if (values == null)
- params[pi].updateParameter(oi, updateValue);
- else
- params[pi].updateParameter(oi, updateValue*values[ei][ci]);
-
- if (useAverage) {
-
- if (updates[pi][oi][VALUE] != 0)
- averageParams[pi].updateParameter(oi, updates[pi][oi][VALUE] *
- (numEvents * (iteration-updates[pi][oi][ITER])
- + (ei-updates[pi][oi][EVENT])));
-
- updates[pi][oi][VALUE] = (int) params[pi].getParameters()[oi];
- updates[pi][oi][ITER] = iteration;
- updates[pi][oi][EVENT] = ei;
- }
- }
- }
- }
- }
- }
+ } else {
+
+ return params;
- //finish average computation
- double totIterations = (double) iterations*numEvents;
- if (useAverage&& iteration == iterations-1) {
- for (int pi = 0; pi< numPreds; pi++) {
- double[] predParams = averageParams[pi].getParameters();
- for (int oi = 0;oi<numOutcomes;oi++) {
- if (updates[pi][oi][VALUE] != 0)
- predParams[oi] += updates[pi][oi][VALUE] *
- (numEvents * (iterations-updates[pi][oi][ITER])
- - updates[pi][oi][EVENT]);
-
- if (predParams[oi] != 0) {
- predParams[oi] /=totIterations;
- averageParams[pi].setParameter(oi, predParams[oi]);
- }
- }
- }
}
- }
+
+ }
- private double trainingStats(MutableContext[] params) {
+ private double trainingStats (EvalParameters evalParams) {
int numCorrect = 0;
- int oei = 0;
- for (int ei = 0; ei< numUniqueEvents; ei++, oei++) {
+
+ for (int ei = 0; ei< numUniqueEvents; ei++) {
for (int ni=0;ni<this.numTimesEventsSeen[ei];ni++) {
- for (int oi = 0; oi< numOutcomes; oi++)
- modelDistribution[oi] = 0;
+
+ double[] modelDistribution = new double[numOutcomes];
+
if (values != null)
PerceptronModel.eval(contexts[ei], values[ei], modelDistribution,
evalParams,false);
else
PerceptronModel.eval(contexts[ei], null, modelDistribution,
evalParams, false);
- int max = 0;
- for (int oi = 1; oi< numOutcomes; oi++)
- if (modelDistribution[oi]> modelDistribution[max])
- max = oi;
- if (max == outcomeList[oei])
+
+ int max = maxIndex(modelDistribution);
+ if (max == outcomeList[ei])
numCorrect++;
}
}
double trainingAccuracy = (double) numCorrect / numEvents;
- display(". (" + numCorrect + "/" + numEvents+") " + trainingAccuracy +
"\n");
+ display("Stats: (" + numCorrect + "/" + numEvents+") " + trainingAccuracy +
"\n");
return trainingAccuracy;
}
+
+
+ private int maxIndex (double[] values) {
+ int max = 0;
+ for (int i = 1; i< values.length; i++)
+ if (values[i]> values[max])
+ max = i;
+ return max;
+ }
+
+ private void display (String s) {
+ if (printMessages)
+ System.out.print(s);
+ }
+
+ private void displayIteration (int i) {
+ if (i> 10&& (i%10) != 0)
+ return;
+
+ if (i< 10)
+ display(" " + i + ": ");
+ else if (i< 100)
+ display(" " + i + ": ");
+ else
+ display(i + ": ");
+ }
+
+ // See whether a number is a perfect square. Inefficient, but fine
+ // for our purposes.
+ private final static boolean isPerfectSquare (int n) {
+ int root = (int)Math.sqrt(n);
+ return root*root == n;
+ }
+
}
Added:
incubator/opennlp/trunk/opennlp-maxent/src/test/java/opennlp/perceptron/PerceptronPrepAttachTest.java
URL:
http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-maxent/src/test/java/opennlp/perceptron/PerceptronPrepAttachTest.java?rev=1133246&view=auto
==============================================================================
---
incubator/opennlp/trunk/opennlp-maxent/src/test/java/opennlp/perceptron/PerceptronPrepAttachTest.java
(added)
+++
incubator/opennlp/trunk/opennlp-maxent/src/test/java/opennlp/perceptron/PerceptronPrepAttachTest.java
Wed Jun 8 04:39:32 2011
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.perceptron;
+
+import opennlp.model.*;
+
+import java.io.*;
+import java.util.*;
+import junit.framework.TestCase;
+
+// Test for perceptron training and use.
+public class PerceptronPrepAttachTest extends TestCase {
+
+ public void testPerceptronOnPrepAttachData() throws IOException {
+ List<Event> trainingEvents =
readPpaFile("src/test/resources/data/ppa/training");
+
+ EventStream trainingStream = new ListEventStream(trainingEvents);
+
+ AbstractModel model =
+ new PerceptronTrainer().trainModel(5000, new
TwoPassDataIndexer(trainingStream, 1, false), 1);
+
+ List<Event> devEvents = readPpaFile("src/test/resources/data/ppa/devset");
+
+ int total = 0;
+ int correct = 0;
+ for (Event ev: devEvents) {
+ String targetLabel = ev.getOutcome();
+ double[] ocs = model.eval(ev.getContext());
+
+ int best = 0;
+ for (int i=1; i<ocs.length; i++)
+ if (ocs[i]> ocs[best])
+ best = i;
+
+ String predictedLabel = model.getOutcome(best);
+
+ if (targetLabel.equals(predictedLabel))
+ correct++;
+ total++;
+ }
+
+ double accuracy = correct/(double)total;
+ System.out.println("Accuracy on PPA devset: (" + correct + "/" + total + ")
" + accuracy);
+
+ assertEquals(accuracy, 0.7813815300817034);
+ }
+
+ private static List<Event> readPpaFile (String filename) throws IOException
{
+
+ List<Event> events = new ArrayList<Event>();
+
+ BufferedReader in = new BufferedReader(new FileReader(filename));
+ String line;
+
+ while ( (line = in.readLine()) != null ) {
+ String[] items = line.split("\\s+");
+ String label = items[5];
+ String[] context = { "verb="+items[1], "noun="+items[2], "prep="+items[3],
"prep_obj="+items[4] };
+ events.add(new Event(label, context));
+ }
+ in.close();
+ return events;
+ }
+
+}
+
+
Added: incubator/opennlp/trunk/opennlp-maxent/src/test/resources/data/ppa/README
URL:
http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-maxent/src/test/resources/data/ppa/README?rev=1133246&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-maxent/src/test/resources/data/ppa/README
(added)
+++ incubator/opennlp/trunk/opennlp-maxent/src/test/resources/data/ppa/README
Wed Jun 8 04:39:32 2011
@@ -0,0 +1,28 @@
+This directory contains the data used for the model described in:
+
+ Ratnaparkhi, Adwait, Jeff Reynar, and Salim Roukos (1994). A Maximum
+ Entropy Model for Prepositional Phrase Attachment. Proceedings of
+ the ARPA Human Language Technology Conference.
+ [http://aclweb.org/anthology-new/H/H94/H94-1048.pdf]
+
+Description of Files:
+
+training : training data
+
+devset : development set, used for debugging and algorithm
+ development.
+
+test : used to report results
+
+bitstrings : word classes derived from Mutual Information Clustering
+ for the Wall St. Journal.
+
+
+training, devset, and test are in the format:
+
+<source sentence#> V N1 P N2<attachment>
+
+The data is distributed with the permission of Adwait Ratnaparkhi. The
+data may also be downloaded from:
+
+ http://sites.google.com/site/adwaitratnaparkhi/publications/ppa.tar.gz