I fixed it. Thank you for reviewing.
On Mon, Jun 6, 2011 at 5:27 AM, Jörn Kottmann <[email protected]> wrote: > Hi, > > I might be mistaken, but the train method you added also needs > to place the descriptor in the model. Very similar to the train method > which takes the descriptor, cutoff and iterations. > > Jörn > > > On 6/3/11 7:34 AM, [email protected] wrote: > >> Author: colen >> Date: Fri Jun 3 05:34:34 2011 >> New Revision: 1130898 >> >> URL: http://svn.apache.org/viewvc?rev=1130898&view=rev >> Log: >> OPENNLP-195 Added train method that takes params argument and the >> generatorDescriptor and resourceMap >> >> Modified: >> >> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java >> >> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java >> >> Modified: >> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java >> URL: >> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1130898&r1=1130897&r2=1130898&view=diff >> >> ============================================================================== >> --- >> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java >> (original) >> +++ >> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java >> Fri Jun 3 05:34:34 2011 >> @@ -22,11 +22,9 @@ import java.io.FileInputStream; >> import java.io.IOException; >> import java.io.InputStream; >> import java.nio.charset.Charset; >> -import java.util.Collections; >> import java.util.HashMap; >> import java.util.Map; >> >> -import opennlp.model.TrainUtil; >> import opennlp.tools.cmdline.CLI; >> import opennlp.tools.cmdline.CmdLineTool; >> import opennlp.tools.cmdline.CmdLineUtil; >> @@ -187,8 +185,9 @@ public final class TokenNameFinderTraine >> parameters.getCutoff()); >> } >> else { >> - model = >> opennlp.tools.namefind.NameFinderME.train(parameters.getLanguage(), >> parameters.getType(), sampleStream, mlParams, null, >> - Collections.<String, Object>emptyMap()); >> + model = opennlp.tools.namefind.NameFinderME.train( >> + parameters.getLanguage(), parameters.getType(), sampleStream, >> + mlParams, featureGeneratorBytes, resources); >> } >> } >> catch (IOException e) { >> >> Modified: >> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java >> URL: >> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1130898&r1=1130897&r2=1130898&view=diff >> >> ============================================================================== >> --- >> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java >> (original) >> +++ >> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java >> Fri Jun 3 05:34:34 2011 >> @@ -19,10 +19,7 @@ >> package opennlp.tools.namefind; >> >> import java.io.ByteArrayInputStream; >> -import java.io.FileInputStream; >> -import java.io.FileOutputStream; >> import java.io.IOException; >> -import java.io.InputStreamReader; >> import java.io.ObjectStreamException; >> import java.util.ArrayList; >> import java.util.Collections; >> @@ -40,11 +37,8 @@ import opennlp.model.EventStream; >> import opennlp.model.MaxentModel; >> import opennlp.model.TrainUtil; >> import opennlp.model.TwoPassDataIndexer; >> -import opennlp.tools.postag.POSSampleSequenceStream; >> import opennlp.tools.util.BeamSearch; >> -import opennlp.tools.util.HashSumEventStream; >> import opennlp.tools.util.ObjectStream; >> -import opennlp.tools.util.PlainTextByLineStream; >> import opennlp.tools.util.Sequence; >> import opennlp.tools.util.SequenceValidator; >> import opennlp.tools.util.Span; >> @@ -61,8 +55,6 @@ import opennlp.tools.util.featuregen.Sen >> import opennlp.tools.util.featuregen.TokenClassFeatureGenerator; >> import opennlp.tools.util.featuregen.TokenFeatureGenerator; >> import opennlp.tools.util.featuregen.WindowFeatureGenerator; >> -import opennlp.tools.util.model.BaseModel; >> -import opennlp.tools.util.model.ModelUtil; >> >> /** >> * Class for creating a maximum-entropy-based name finder. >> @@ -210,6 +202,26 @@ public class NameFinderME implements Tok >> }); >> } >> >> + private static AdaptiveFeatureGenerator createFeatureGenerator( >> + byte[] generatorDescriptor, final Map<String, Object> resources) >> + throws IOException { >> + AdaptiveFeatureGenerator featureGenerator; >> + >> + if (generatorDescriptor != null) { >> + featureGenerator = GeneratorFactory.create(new >> ByteArrayInputStream( >> + generatorDescriptor), new FeatureGeneratorResourceProvider() { >> + >> + public Object getResource(String key) { >> + return resources.get(key); >> + } >> + }); >> + } else { >> + featureGenerator = null; >> + } >> + >> + return featureGenerator; >> + } >> + >> public Span[] find(String[] tokens) { >> return find(tokens, EMPTY); >> } >> @@ -328,6 +340,26 @@ public class NameFinderME implements Tok >> return sprobs; >> } >> >> + /** >> + * Trains a name finder model. >> + * >> + * @param languageCode >> + * the language of the training data >> + * @param type >> + * null or an override type for all types in the training >> data >> + * @param samples >> + * the training data >> + * @param trainParams >> + * machine learning train parameters >> + * @param generator >> + * null or the feature generator >> + * @param resources >> + * the resources for the name finder or null if none >> + * >> + * @return the newly trained model >> + * >> + * @throws IOException >> + */ >> public static TokenNameFinderModel train(String languageCode, String >> type, ObjectStream<NameSample> samples, >> TrainingParameters trainParams, AdaptiveFeatureGenerator >> generator, final Map<String, Object> resources) throws IOException { >> >> @@ -358,6 +390,34 @@ public class NameFinderME implements Tok >> resources, manifestInfoEntries); >> } >> >> + /** >> + * Trains a name finder model. >> + * >> + * @param languageCode >> + * the language of the training data >> + * @param type >> + * null or an override type for all types in the training data >> + * @param samples >> + * the training data >> + * @param trainParams >> + * machine learning train parameters >> + * @param featureGeneratorBytes >> + * descriptor to configure the feature generation or null >> + * @param resources >> + * the resources for the name finder or null if none >> + * >> + * @return the newly trained model >> + * >> + * @throws IOException >> + */ >> + public static TokenNameFinderModel train(String languageCode, String >> type, >> + ObjectStream<NameSample> samples, TrainingParameters trainParams, >> + byte[] featureGeneratorBytes, final Map<String, Object> resources) >> + throws IOException { >> + return train(languageCode, type, samples, trainParams, >> + createFeatureGenerator(featureGeneratorBytes, resources), >> resources); >> + } >> + >> /** >> * Trains a name finder model. >> * >> @@ -403,19 +463,7 @@ public class NameFinderME implements Tok >> >> // TODO: Pass in resource manager ... >> >> - AdaptiveFeatureGenerator featureGenerator; >> - >> - if (generatorDescriptor != null) { >> - featureGenerator = GeneratorFactory.create(new >> ByteArrayInputStream(generatorDescriptor), new >> FeatureGeneratorResourceProvider() { >> - >> - public Object getResource(String key) { >> - return resources.get(key); >> - } >> - }); >> - } >> - else { >> - featureGenerator = null; >> - } >> + AdaptiveFeatureGenerator featureGenerator = >> createFeatureGenerator(generatorDescriptor, resources); >> >> TokenNameFinderModel model = train(languageCode, type, samples, >> featureGenerator, >> resources, iterations, cutoff); >> @@ -427,7 +475,6 @@ public class NameFinderME implements Tok >> return model; >> } >> >> - >> @Deprecated >> public static GISModel train(EventStream es, int iterations, int cut) >> throws IOException { >> return GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut)); >> >> >> >
