Hello, sorry for my late response. I reviewed your change and it looks good to me. Maybe you could add a comment about it to the Javadoc of that method.
Jörn On Wed, 2014-04-16 at 12:37 -0300, William Colen wrote: > Jörn, > > Can you please review my change to the ExtensionLoader? I modified it to > accept singletons (private constructor and the field INSTANCE). > > Thank you, > William > > > 2014-04-16 12:26 GMT-03:00 <co...@apache.org>: > > > Author: colen > > Date: Wed Apr 16 15:26:24 2014 > > New Revision: 1587944 > > > > URL: http://svn.apache.org/r1587944 > > Log: > > OPENNLP-674 Added factory to Doccat > > > > Added: > > > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java > > (with props) > > > > opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DoccatFactoryTest.java > > (with props) > > opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/doccat/ > > > > opennlp/trunk/opennlp-tools/src/test/resources/opennlp/tools/doccat/DoccatSample.txt > > (with props) > > Modified: > > > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java > > > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java > > > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java > > > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatCrossValidator.java > > > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java > > > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java > > > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorFactory.java > > > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java > > > > Modified: > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java > > URL: > > http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java?rev=1587944&r1=1587943&r2=1587944&view=diff > > > > ============================================================================== > > --- > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java > > (original) > > +++ > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java > > Wed Apr 16 15:26:24 2014 > > @@ -34,8 +34,10 @@ import opennlp.tools.cmdline.doccat.Docc > > import opennlp.tools.cmdline.params.CVParams; > > import opennlp.tools.doccat.DoccatCrossValidator; > > import opennlp.tools.doccat.DoccatEvaluationMonitor; > > +import opennlp.tools.doccat.DoccatFactory; > > import opennlp.tools.doccat.DocumentSample; > > import opennlp.tools.doccat.FeatureGenerator; > > +import opennlp.tools.tokenize.Tokenizer; > > import opennlp.tools.util.eval.EvaluationMonitor; > > import opennlp.tools.util.model.ModelUtil; > > > > @@ -88,13 +90,18 @@ public final class DoccatCrossValidatorT > > FeatureGenerator[] featureGenerators = DoccatTrainerTool > > .createFeatureGenerators(params.getFeatureGenerators()); > > > > + Tokenizer tokenizer = DoccatTrainerTool.createTokenizer(params > > + .getTokenizer()); > > + > > DoccatEvaluationMonitor[] listenersArr = listeners > > .toArray(new DoccatEvaluationMonitor[listeners.size()]); > > > > DoccatCrossValidator validator; > > try { > > + DoccatFactory factory = DoccatFactory.create(params.getFactory(), > > + tokenizer, featureGenerators); > > validator = new DoccatCrossValidator(params.getLang(), mlParams, > > - featureGenerators, listenersArr); > > + factory, listenersArr); > > > > validator.evaluate(sampleStream, params.getFolds()); > > } catch (IOException e) { > > > > Modified: > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java > > URL: > > http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java?rev=1587944&r1=1587943&r2=1587944&view=diff > > > > ============================================================================== > > --- > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java > > (original) > > +++ > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java > > Wed Apr 16 15:26:24 2014 > > @@ -26,16 +26,19 @@ import opennlp.tools.cmdline.TerminateTo > > import opennlp.tools.cmdline.doccat.DoccatTrainerTool.TrainerToolParams; > > import opennlp.tools.cmdline.params.TrainingToolParams; > > import opennlp.tools.doccat.BagOfWordsFeatureGenerator; > > +import opennlp.tools.doccat.DoccatFactory; > > import opennlp.tools.doccat.DoccatModel; > > import opennlp.tools.doccat.DocumentCategorizerME; > > import opennlp.tools.doccat.DocumentSample; > > import opennlp.tools.doccat.FeatureGenerator; > > +import opennlp.tools.tokenize.Tokenizer; > > +import opennlp.tools.tokenize.WhitespaceTokenizer; > > import opennlp.tools.util.ext.ExtensionLoader; > > import opennlp.tools.util.model.ModelUtil; > > > > public class DoccatTrainerTool > > extends AbstractTrainerTool<DocumentSample, TrainerToolParams> { > > - > > + > > interface TrainerToolParams extends TrainingParams, TrainingToolParams { > > } > > > > @@ -47,7 +50,7 @@ public class DoccatTrainerTool > > public String getShortDescription() { > > return "trainer for the learnable document categorizer"; > > } > > - > > + > > @Override > > public void run(String format, String[] args) { > > super.run(format, args); > > @@ -64,10 +67,14 @@ public class DoccatTrainerTool > > FeatureGenerator[] featureGenerators = createFeatureGenerators(params > > .getFeatureGenerators()); > > > > + Tokenizer tokenizer = createTokenizer(params.getTokenizer()); > > + > > DoccatModel model; > > try { > > + DoccatFactory factory = DoccatFactory.create(params.getFactory(), > > + tokenizer, featureGenerators); > > model = DocumentCategorizerME.train(params.getLang(), sampleStream, > > - mlParams, featureGenerators); > > + mlParams, factory); > > } catch (IOException e) { > > throw new TerminateToolException(-1, "IO error while reading > > training data or indexing data: " + > > e.getMessage(), e); > > @@ -79,10 +86,17 @@ public class DoccatTrainerTool > > // sorry that this can fail > > } > > } > > - > > + > > CmdLineUtil.writeModel("document categorizer", modelOutFile, model); > > } > > > > + static Tokenizer createTokenizer(String tokenizer) { > > + if(tokenizer != null) { > > + return ExtensionLoader.instantiateExtension(Tokenizer.class, > > tokenizer); > > + } > > + return WhitespaceTokenizer.INSTANCE; > > + } > > + > > static FeatureGenerator[] createFeatureGenerators(String > > featureGeneratorsNames) { > > if(featureGeneratorsNames == null) { > > FeatureGenerator[] def = {new BagOfWordsFeatureGenerator()}; > > > > Modified: > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java > > URL: > > http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java?rev=1587944&r1=1587943&r2=1587944&view=diff > > > > ============================================================================== > > --- > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java > > (original) > > +++ > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java > > Wed Apr 16 15:26:24 2014 > > @@ -32,4 +32,12 @@ interface TrainingParams extends BasicTr > > @OptionalParameter > > String getFeatureGenerators(); > > > > + @ParameterDescription(valueName = "tokenizer", description = "Tokenizer > > implementation. WhitespaceTokenizer is used if not specified.") > > + @OptionalParameter > > + String getTokenizer(); > > + > > + @ParameterDescription(valueName = "factoryName", description = "A > > sub-class of DoccatFactory where to get implementation and resources.") > > + @OptionalParameter > > + String getFactory(); > > + > > } > > > > Modified: > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatCrossValidator.java > > URL: > > http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatCrossValidator.java?rev=1587944&r1=1587943&r2=1587944&view=diff > > > > ============================================================================== > > --- > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatCrossValidator.java > > (original) > > +++ > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatCrossValidator.java > > Wed Apr 16 15:26:24 2014 > > @@ -34,18 +34,19 @@ public class DoccatCrossValidator { > > > > private DoccatEvaluationMonitor[] listeners; > > > > - private FeatureGenerator[] featureGenarators; > > + private DoccatFactory factory; > > + > > > > /** > > * Creates a {@link DoccatCrossValidator} with the given > > * {@link FeatureGenerator}s. > > */ > > public DoccatCrossValidator(String languageCode, TrainingParameters > > mlParams, > > - FeatureGenerator[] featureGenerators, DoccatEvaluationMonitor[] > > listeners) { > > + DoccatFactory factory, DoccatEvaluationMonitor ... listeners) { > > this.languageCode = languageCode; > > this.params = mlParams; > > this.listeners = listeners; > > - this.featureGenarators = featureGenerators; > > + this.factory = factory; > > } > > > > /** > > @@ -70,7 +71,7 @@ public class DoccatCrossValidator { > > .next(); > > > > DoccatModel model = DocumentCategorizerME.train(languageCode, > > - trainingSampleStream, params, featureGenarators); > > + trainingSampleStream, params, factory); > > > > DocumentCategorizerEvaluator evaluator = new > > DocumentCategorizerEvaluator( > > new DocumentCategorizerME(model), listeners); > > > > Added: > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java > > URL: > > http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java?rev=1587944&view=auto > > > > ============================================================================== > > --- > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java > > (added) > > +++ > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java > > Wed Apr 16 15:26:24 2014 > > @@ -0,0 +1,174 @@ > > +/* > > + * Licensed to the Apache Software Foundation (ASF) under one or more > > + * contributor license agreements. See the NOTICE file distributed with > > + * this work for additional information regarding copyright ownership. > > + * The ASF licenses this file to You under the Apache License, Version 2.0 > > + * (the "License"); you may not use this file except in compliance with > > + * the License. You may obtain a copy of the License at > > + * > > + * http://www.apache.org/licenses/LICENSE-2.0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > > implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +package opennlp.tools.doccat; > > + > > +import java.util.Arrays; > > +import java.util.Iterator; > > +import java.util.List; > > +import java.util.Map; > > + > > +import opennlp.tools.tokenize.Tokenizer; > > +import opennlp.tools.tokenize.WhitespaceTokenizer; > > +import opennlp.tools.util.BaseToolFactory; > > +import opennlp.tools.util.InvalidFormatException; > > +import opennlp.tools.util.ext.ExtensionLoader; > > + > > +/** > > + * The factory that provides Doccat default implementations and resources > > + */ > > +public class DoccatFactory extends BaseToolFactory { > > + > > + private static final String FEATURE_GENERATORS = > > "doccat.featureGenerators"; > > + private static final String TOKENIZER_NAME = "doccat.tokenizer"; > > + > > + private FeatureGenerator[] featureGenerators; > > + private Tokenizer tokenizer; > > + > > + /** > > + * Creates a {@link DoccatFactory} that provides the default > > implementation of > > + * the resources. > > + */ > > + public DoccatFactory() { > > + } > > + > > + /** > > + * Creates a {@link DoccatFactory}. Use this constructor to > > programmatically > > + * create a factory. > > + * > > + * @param tokenizer > > + * @param featureGenerators > > + */ > > + public DoccatFactory(Tokenizer tokenizer, FeatureGenerator[] > > featureGenerators) { > > + this.init(tokenizer, featureGenerators); > > + } > > + > > + protected void init(Tokenizer tokenizer, FeatureGenerator[] > > featureGenerators) { > > + > > + this.featureGenerators = featureGenerators; > > + this.tokenizer = tokenizer; > > + } > > + > > + @Override > > + public Map<String, String> createManifestEntries() { > > + Map<String, String> manifestEntries = super.createManifestEntries(); > > + > > + if (getTokenizer() != null) { > > + manifestEntries.put(TOKENIZER_NAME, getTokenizer().getClass() > > + .getCanonicalName()); > > + } > > + > > + if (getFeatureGenerators() != null) { > > + manifestEntries.put(FEATURE_GENERATORS, > > featureGeneratorsAsString()); > > + } > > + > > + return manifestEntries; > > + } > > + > > + private String featureGeneratorsAsString() { > > + List<FeatureGenerator> fgs = Arrays.asList(getFeatureGenerators()); > > + Iterator<FeatureGenerator> iter = fgs.iterator(); > > + StringBuilder sb = new StringBuilder(); > > + if (iter.hasNext()) { > > + sb.append(iter.next().getClass().getCanonicalName()); > > + while (iter.hasNext()) { > > + sb.append(',').append(iter.next().getClass().getCanonicalName()); > > + } > > + } > > + return sb.toString(); > > + } > > + > > + @Override > > + public void validateArtifactMap() throws InvalidFormatException { > > + // nothing to validate > > + } > > + > > + public static DoccatFactory create(String subclassName, Tokenizer > > tokenizer, > > + FeatureGenerator[] featureGenerators) throws InvalidFormatException > > { > > + if (subclassName == null) { > > + // will create the default factory > > + return new DoccatFactory(tokenizer, featureGenerators); > > + } > > + try { > > + DoccatFactory theFactory = ExtensionLoader.instantiateExtension( > > + DoccatFactory.class, subclassName); > > + theFactory.init(tokenizer, featureGenerators); > > + return theFactory; > > + } catch (Exception e) { > > + String msg = "Could not instantiate the " + subclassName > > + + ". The initialization throw an exception."; > > + System.err.println(msg); > > + e.printStackTrace(); > > + throw new InvalidFormatException(msg, e); > > + } > > + > > + } > > + > > + private FeatureGenerator[] loadFeatureGenerators(String classNames) { > > + String[] classes = classNames.split(","); > > + FeatureGenerator[] fgs = new FeatureGenerator[classes.length]; > > + > > + for (int i = 0; i < classes.length; i++) { > > + fgs[i] = > > ExtensionLoader.instantiateExtension(FeatureGenerator.class, > > + classes[i]); > > + } > > + return fgs; > > + } > > + > > + public FeatureGenerator[] getFeatureGenerators() { > > + if (featureGenerators == null) { > > + if (artifactProvider != null) { > > + String classNames = artifactProvider > > + .getManifestProperty(FEATURE_GENERATORS); > > + if (classNames != null) { > > + this.featureGenerators = loadFeatureGenerators(classNames); > > + } > > + } > > + if (featureGenerators == null) { // could not load using artifact > > provider > > + // load bag of words as default > > + FeatureGenerator[] bow = { new BagOfWordsFeatureGenerator() }; > > + this.featureGenerators = bow; > > + } > > + } > > + return featureGenerators; > > + } > > + > > + public void setFeatureGenerators(FeatureGenerator[] featureGenerators) { > > + this.featureGenerators = featureGenerators; > > + } > > + > > + public Tokenizer getTokenizer() { > > + if (this.tokenizer == null) { > > + if (artifactProvider != null) { > > + String className = > > artifactProvider.getManifestProperty(TOKENIZER_NAME); > > + if (className != null) { > > + this.tokenizer = ExtensionLoader.instantiateExtension( > > + Tokenizer.class, className); > > + } > > + } > > + if (this.tokenizer == null) { // could not load using artifact > > provider > > + this.tokenizer = WhitespaceTokenizer.INSTANCE; > > + } > > + } > > + return tokenizer; > > + } > > + > > + public void setTokenizer(Tokenizer tokenizer) { > > + this.tokenizer = tokenizer; > > + } > > + > > +} > > > > Propchange: > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatFactory.java > > > > ------------------------------------------------------------------------------ > > svn:mime-type = text/plain > > > > Modified: > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java > > URL: > > http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java?rev=1587944&r1=1587943&r2=1587944&view=diff > > > > ============================================================================== > > --- > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java > > (original) > > +++ > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java > > Wed Apr 16 15:26:24 2014 > > @@ -25,34 +25,50 @@ import java.util.Map; > > > > import opennlp.tools.ml.model.AbstractModel; > > import opennlp.tools.ml.model.MaxentModel; > > +import opennlp.tools.util.BaseToolFactory; > > import opennlp.tools.util.InvalidFormatException; > > import opennlp.tools.util.model.BaseModel; > > > > public class DoccatModel extends BaseModel { > > - > > + > > private static final String COMPONENT_NAME = "DocumentCategorizerME"; > > private static final String DOCCAT_MODEL_ENTRY_NAME = "doccat.model"; > > - > > - protected DoccatModel(String languageCode, MaxentModel doccatModel, > > - Map<String, String> manifestInfoEntries) { > > - super(COMPONENT_NAME, languageCode, manifestInfoEntries); > > - > > + > > + public DoccatModel(String languageCode, MaxentModel doccatModel, > > + Map<String, String> manifestInfoEntries, DoccatFactory factory) { > > + super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory); > > + > > artifactMap.put(DOCCAT_MODEL_ENTRY_NAME, doccatModel); > > checkArtifactMap(); > > } > > - > > + > > + /** > > + * @deprecated Use > > + * {@link #DoccatModel(String, MaxentModel, Map, > > DoccatFactory)} > > + * instead and pass in a {@link DoccatFactory} > > + */ > > + protected DoccatModel(String languageCode, MaxentModel doccatModel, > > + Map<String, String> manifestInfoEntries) { > > + this(languageCode, doccatModel, manifestInfoEntries, new > > DoccatFactory()); > > + } > > + > > + /** > > + * @deprecated Use > > + * {@link #DoccatModel(String, MaxentModel, Map, > > DoccatFactory)} > > + * instead and pass in a {@link DoccatFactory} > > + */ > > public DoccatModel(String languageCode, MaxentModel doccatModel) { > > this(languageCode, doccatModel, null); > > } > > - > > + > > public DoccatModel(InputStream in) throws IOException, > > InvalidFormatException { > > super(COMPONENT_NAME, in); > > } > > - > > + > > public DoccatModel(File modelFile) throws IOException, > > InvalidFormatException { > > super(COMPONENT_NAME, modelFile); > > } > > - > > + > > public DoccatModel(URL modelURL) throws IOException, > > InvalidFormatException { > > super(COMPONENT_NAME, modelURL); > > } > > @@ -66,7 +82,23 @@ public class DoccatModel extends BaseMod > > } > > } > > > > + public DoccatFactory getFactory() { > > + return (DoccatFactory) this.toolFactory; > > + } > > + > > + @Override > > + protected Class<? extends BaseToolFactory> getDefaultFactory() { > > + return DoccatFactory.class; > > + } > > + > > + /** > > + * @deprecated Use {@link #getMaxentModel()} instead. > > + */ > > public MaxentModel getChunkerModel() { > > return (MaxentModel) artifactMap.get(DOCCAT_MODEL_ENTRY_NAME); > > } > > + > > + public MaxentModel getMaxentModel() { > > + return (MaxentModel) artifactMap.get(DOCCAT_MODEL_ENTRY_NAME); > > + } > > } > > > > Modified: > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java > > URL: > > http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java?rev=1587944&r1=1587943&r2=1587944&view=diff > > > > ============================================================================== > > --- > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java > > (original) > > +++ > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java > > Wed Apr 16 15:26:24 2014 > > @@ -25,7 +25,6 @@ import java.util.Map; > > > > import opennlp.tools.ml.model.MaxentModel; > > import opennlp.tools.ml.model.TrainUtil; > > -import opennlp.tools.tokenize.SimpleTokenizer; > > import opennlp.tools.tokenize.Tokenizer; > > import opennlp.tools.util.ObjectStream; > > import opennlp.tools.util.TrainingParameters; > > @@ -40,29 +39,35 @@ public class DocumentCategorizerME imple > > * Shared default thread safe feature generator. > > */ > > private static FeatureGenerator defaultFeatureGenerator = new > > BagOfWordsFeatureGenerator(); > > - > > - private MaxentModel model; > > + > > + private DoccatModel model; > > private DocumentCategorizerContextGenerator mContextGenerator; > > > > /** > > - * Initializes a the current instance with a doccat model and custom > > feature generation. > > - * The feature generation must be identical to the configuration at > > training time. > > - * > > + * Initializes a the current instance with a doccat model and custom > > feature > > + * generation. The feature generation must be identical to the > > configuration > > + * at training time. > > + * > > * @param model > > * @param featureGenerators > > + * > > + * @deprecated train a {@link DoccatModel} with a specific > > + * {@link DoccatFactory} to customize the {@link > > FeatureGenerator}s > > */ > > public DocumentCategorizerME(DoccatModel model, FeatureGenerator... > > featureGenerators) { > > - this.model = model.getChunkerModel(); > > + this.model = model; > > this.mContextGenerator = new > > DocumentCategorizerContextGenerator(featureGenerators); > > } > > - > > + > > /** > > * Initializes the current instance with a doccat model. Default > > feature generation is used. > > - * > > + * > > * @param model > > */ > > public DocumentCategorizerME(DoccatModel model) { > > - this(model, defaultFeatureGenerator); > > + this.model = model; > > + this.mContextGenerator = new > > DocumentCategorizerContextGenerator(this.model > > + .getFactory().getFeatureGenerators()); > > } > > > > /** > > @@ -71,7 +76,7 @@ public class DocumentCategorizerME imple > > * @param text > > */ > > public double[] categorize(String text[]) { > > - return model.eval(mContextGenerator.getContext(text)); > > + return > > model.getMaxentModel().eval(mContextGenerator.getContext(text)); > > } > > > > /** > > @@ -79,57 +84,79 @@ public class DocumentCategorizerME imple > > * is passed to the feature generation. > > */ > > public double[] categorize(String documentText) { > > - Tokenizer tokenizer = SimpleTokenizer.INSTANCE; > > + Tokenizer tokenizer = model.getFactory().getTokenizer(); > > return categorize(tokenizer.tokenize(documentText)); > > } > > > > public String getBestCategory(double[] outcome) { > > - return model.getBestOutcome(outcome); > > + return model.getMaxentModel().getBestOutcome(outcome); > > } > > > > public int getIndex(String category) { > > - return model.getIndex(category); > > + return model.getMaxentModel().getIndex(category); > > } > > > > public String getCategory(int index) { > > - return model.getOutcome(index); > > + return model.getMaxentModel().getOutcome(index); > > } > > > > public int getNumberOfCategories() { > > - return model.getNumOutcomes(); > > + return model.getMaxentModel().getNumOutcomes(); > > } > > > > public String getAllResults(double results[]) { > > - return model.getAllOutcomes(results); > > + return model.getMaxentModel().getAllOutcomes(results); > > } > > > > + /** > > + * @deprecated Use > > + * {@link #train(String, ObjectStream, TrainingParameters, > > DoccatFactory)} > > + * instead. > > + */ > > public static DoccatModel train(String languageCode, > > ObjectStream<DocumentSample> samples, > > TrainingParameters mlParams, FeatureGenerator... featureGenerators) > > throws IOException { > > - > > + > > if (featureGenerators.length == 0) { > > featureGenerators = new > > FeatureGenerator[]{defaultFeatureGenerator}; > > } > > - > > + > > Map<String, String> manifestInfoEntries = new HashMap<String, > > String>(); > > - > > + > > MaxentModel model = TrainUtil.train( > > new DocumentCategorizerEventStream(samples, featureGenerators), > > mlParams.getSettings(), manifestInfoEntries); > > - > > + > > return new DoccatModel(languageCode, model, manifestInfoEntries); > > } > > - > > + > > + public static DoccatModel train(String languageCode, > > ObjectStream<DocumentSample> samples, > > + TrainingParameters mlParams, DoccatFactory factory) > > + throws IOException { > > + > > + Map<String, String> manifestInfoEntries = new HashMap<String, > > String>(); > > + > > + MaxentModel model = TrainUtil.train( > > + new DocumentCategorizerEventStream(samples, > > factory.getFeatureGenerators()), > > + mlParams.getSettings(), manifestInfoEntries); > > + > > + return new DoccatModel(languageCode, model, manifestInfoEntries, > > factory); > > + } > > + > > /** > > * Trains a doccat model with default feature generation. > > - * > > + * > > * @param languageCode > > * @param samples > > - * > > + * > > * @return the trained doccat model > > - * > > + * > > * @throws IOException > > - * @throws ObjectStreamException > > + * @throws ObjectStreamException > > + * > > + * @deprecated Use > > + * {@link #train(String, ObjectStream, TrainingParameters, > > DoccatFactory)} > > + * instead. > > */ > > public static DoccatModel train(String languageCode, > > ObjectStream<DocumentSample> samples) throws IOException { > > return train(languageCode, samples, > > ModelUtil.createDefaultTrainingParameters(), defaultFeatureGenerator); > > > > Modified: > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorFactory.java > > URL: > > http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorFactory.java?rev=1587944&r1=1587943&r2=1587944&view=diff > > > > ============================================================================== > > --- > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorFactory.java > > (original) > > +++ > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorFactory.java > > Wed Apr 16 15:26:24 2014 > > @@ -52,7 +52,7 @@ public class SentenceDetectorFactory ext > > /** > > * Creates a {@link SentenceDetectorFactory}. Use this constructor to > > * programmatically create a factory. > > - * > > + * > > * @param languageCode > > * @param abbreviationDictionary > > * @param eosCharacters > > @@ -61,7 +61,7 @@ public class SentenceDetectorFactory ext > > Dictionary abbreviationDictionary, char[] eosCharacters) { > > this.init(languageCode, useTokenEnd, abbreviationDictionary, > > eosCharacters); > > } > > - > > + > > protected void init(String languageCode, boolean useTokenEnd, > > Dictionary abbreviationDictionary, char[] eosCharacters) { > > this.languageCode = languageCode; > > > > Modified: > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java > > URL: > > http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java?rev=1587944&r1=1587943&r2=1587944&view=diff > > > > ============================================================================== > > --- > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java > > (original) > > +++ > > opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/ext/ExtensionLoader.java > > Wed Apr 16 15:26:24 2014 > > @@ -17,6 +17,8 @@ > > > > package opennlp.tools.util.ext; > > > > +import java.lang.reflect.Field; > > + > > /** > > * The {@link ExtensionLoader} is responsible to load extensions to the > > OpenNLP library. > > * <p> > > @@ -64,6 +66,24 @@ public class ExtensionLoader { > > } catch (InstantiationException e) { > > throw new ExtensionNotLoadedException(e); > > } catch (IllegalAccessException e) { > > + // constructor is private. Try to load using INSTANCE > > + Field instanceField; > > + try { > > + instanceField = extClazz.getDeclaredField("INSTANCE"); > > + } catch (NoSuchFieldException e1) { > > + throw new ExtensionNotLoadedException(e1); > > + } catch (SecurityException e1) { > > + throw new ExtensionNotLoadedException(e1); > > + } > > + if(instanceField != null) { > > + try { > > + return (T) instanceField.get(null); > > + } catch (IllegalArgumentException e1) { > > + throw new ExtensionNotLoadedException(e1); > > + } catch (IllegalAccessException e1) { > > + throw new ExtensionNotLoadedException(e1); > > + } > > + } > > throw new ExtensionNotLoadedException(e); > > } > > } > > > > Added: > > opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DoccatFactoryTest.java > > URL: > > http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DoccatFactoryTest.java?rev=1587944&view=auto > > > > ============================================================================== > > --- > > opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DoccatFactoryTest.java > > (added) > > +++ > > opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DoccatFactoryTest.java > > Wed Apr 16 15:26:24 2014 > > @@ -0,0 +1,100 @@ > > +package opennlp.tools.doccat; > > + > > +import static org.junit.Assert.assertEquals; > > +import static org.junit.Assert.assertNotNull; > > + > > +import java.io.ByteArrayInputStream; > > +import java.io.ByteArrayOutputStream; > > +import java.io.IOException; > > + > > +import opennlp.tools.formats.ResourceAsStreamFactory; > > +import opennlp.tools.tokenize.SimpleTokenizer; > > +import opennlp.tools.tokenize.WhitespaceTokenizer; > > +import opennlp.tools.util.InputStreamFactory; > > +import opennlp.tools.util.ObjectStream; > > +import opennlp.tools.util.PlainTextByLineStream; > > +import opennlp.tools.util.TrainingParameters; > > + > > +import org.junit.Test; > > + > > +/** > > + * Tests for the {@link DoccatFactory} class. > > + */ > > +public class DoccatFactoryTest { > > + > > + private static ObjectStream<DocumentSample> createSampleStream() > > + throws IOException { > > + > > + InputStreamFactory isf = new ResourceAsStreamFactory( > > + DoccatFactoryTest.class, > > "/opennlp/tools/doccat/DoccatSample.txt"); > > + > > + return new DocumentSampleStream(new PlainTextByLineStream(isf, > > "UTF-8")); > > + } > > + > > + private static DoccatModel train() throws IOException { > > + return DocumentCategorizerME.train("x-unspecified", > > createSampleStream(), > > + TrainingParameters.defaultParams()); > > + } > > + > > + private static DoccatModel train(DoccatFactory factory) throws > > IOException { > > + return DocumentCategorizerME.train("x-unspecified", > > createSampleStream(), > > + TrainingParameters.defaultParams(), factory); > > + } > > + > > + @Test > > + public void testDefault() throws IOException { > > + DoccatModel model = train(); > > + > > + assertNotNull(model); > > + > > + ByteArrayOutputStream out = new ByteArrayOutputStream(); > > + model.serialize(out); > > + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); > > + > > + DoccatModel fromSerialized = new DoccatModel(in); > > + > > + DoccatFactory factory = fromSerialized.getFactory(); > > + > > + assertNotNull(factory); > > + > > + assertEquals(1, factory.getFeatureGenerators().length); > > + assertEquals(BagOfWordsFeatureGenerator.class, > > + factory.getFeatureGenerators()[0].getClass()); > > + > > + assertEquals(WhitespaceTokenizer.INSTANCE, factory.getTokenizer()); > > + > > + } > > + > > + @Test > > + public void testCustom() throws IOException { > > + FeatureGenerator[] featureGenerators = { new > > BagOfWordsFeatureGenerator(), > > + new NGramFeatureGenerator() }; > > + DoccatFactory factory = new DoccatFactory(SimpleTokenizer.INSTANCE, > > + featureGenerators); > > + > > + DoccatModel model = train(factory); > > + > > + assertNotNull(model); > > + > > + ByteArrayOutputStream out = new ByteArrayOutputStream(); > > + model.serialize(out); > > + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); > > + > > + DoccatModel fromSerialized = new DoccatModel(in); > > + > > + factory = fromSerialized.getFactory(); > > + > > + assertNotNull(factory); > > + > > + assertEquals(2, factory.getFeatureGenerators().length); > > + assertEquals(BagOfWordsFeatureGenerator.class, > > + factory.getFeatureGenerators()[0].getClass()); > > + assertEquals(NGramFeatureGenerator.class, > > + factory.getFeatureGenerators()[1].getClass()); > > + > > + assertEquals(SimpleTokenizer.INSTANCE.getClass(), > > factory.getTokenizer() > > + .getClass()); > > + > > + } > > + > > +} > > > > Propchange: > > opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DoccatFactoryTest.java > > > > ------------------------------------------------------------------------------ > > svn:mime-type = text/plain > > > > > >