NameFinderME.java

[email protected] Mon, 06 Jun 2011 19:24:46 -0700

I fixed it. Thank you for reviewing.


On Mon, Jun 6, 2011 at 5:27 AM, Jörn Kottmann <[email protected]> wrote:

> Hi,
>
> I might be mistaken, but the train method you added also needs
> to place the descriptor in the model. Very similar to the train method
> which takes the descriptor, cutoff and iterations.
>
> Jörn
>
>
> On 6/3/11 7:34 AM, [email protected] wrote:
>
>> Author: colen
>> Date: Fri Jun  3 05:34:34 2011
>> New Revision: 1130898
>>
>> URL: http://svn.apache.org/viewvc?rev=1130898&view=rev
>> Log:
>> OPENNLP-195 Added train method that takes params argument and the
>> generatorDescriptor and resourceMap
>>
>> Modified:
>>
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
>>
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
>>
>> Modified:
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
>> URL:
>> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1130898&r1=1130897&r2=1130898&view=diff
>>
>> ==============================================================================
>> ---
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
>> (original)
>> +++
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
>> Fri Jun  3 05:34:34 2011
>> @@ -22,11 +22,9 @@ import java.io.FileInputStream;
>>  import java.io.IOException;
>>  import java.io.InputStream;
>>  import java.nio.charset.Charset;
>> -import java.util.Collections;
>>  import java.util.HashMap;
>>  import java.util.Map;
>>
>> -import opennlp.model.TrainUtil;
>>  import opennlp.tools.cmdline.CLI;
>>  import opennlp.tools.cmdline.CmdLineTool;
>>  import opennlp.tools.cmdline.CmdLineUtil;
>> @@ -187,8 +185,9 @@ public final class TokenNameFinderTraine
>>             parameters.getCutoff());
>>        }
>>        else {
>> -        model =
>> opennlp.tools.namefind.NameFinderME.train(parameters.getLanguage(),
>> parameters.getType(), sampleStream, mlParams, null,
>> -            Collections.<String, Object>emptyMap());
>> +        model = opennlp.tools.namefind.NameFinderME.train(
>> +            parameters.getLanguage(), parameters.getType(), sampleStream,
>> +            mlParams, featureGeneratorBytes, resources);
>>        }
>>      }
>>      catch (IOException e) {
>>
>> Modified:
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
>> URL:
>> http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java?rev=1130898&r1=1130897&r2=1130898&view=diff
>>
>> ==============================================================================
>> ---
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
>> (original)
>> +++
>> incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java
>> Fri Jun  3 05:34:34 2011
>> @@ -19,10 +19,7 @@
>>  package opennlp.tools.namefind;
>>
>>  import java.io.ByteArrayInputStream;
>> -import java.io.FileInputStream;
>> -import java.io.FileOutputStream;
>>  import java.io.IOException;
>> -import java.io.InputStreamReader;
>>  import java.io.ObjectStreamException;
>>  import java.util.ArrayList;
>>  import java.util.Collections;
>> @@ -40,11 +37,8 @@ import opennlp.model.EventStream;
>>  import opennlp.model.MaxentModel;
>>  import opennlp.model.TrainUtil;
>>  import opennlp.model.TwoPassDataIndexer;
>> -import opennlp.tools.postag.POSSampleSequenceStream;
>>  import opennlp.tools.util.BeamSearch;
>> -import opennlp.tools.util.HashSumEventStream;
>>  import opennlp.tools.util.ObjectStream;
>> -import opennlp.tools.util.PlainTextByLineStream;
>>  import opennlp.tools.util.Sequence;
>>  import opennlp.tools.util.SequenceValidator;
>>  import opennlp.tools.util.Span;
>> @@ -61,8 +55,6 @@ import opennlp.tools.util.featuregen.Sen
>>  import opennlp.tools.util.featuregen.TokenClassFeatureGenerator;
>>  import opennlp.tools.util.featuregen.TokenFeatureGenerator;
>>  import opennlp.tools.util.featuregen.WindowFeatureGenerator;
>> -import opennlp.tools.util.model.BaseModel;
>> -import opennlp.tools.util.model.ModelUtil;
>>
>>  /**
>>   * Class for creating a maximum-entropy-based name finder.
>> @@ -210,6 +202,26 @@ public class NameFinderME implements Tok
>>             });
>>    }
>>
>> +  private static AdaptiveFeatureGenerator createFeatureGenerator(
>> +      byte[] generatorDescriptor, final Map<String, Object>  resources)
>> +      throws IOException {
>> +    AdaptiveFeatureGenerator featureGenerator;
>> +
>> +    if (generatorDescriptor != null) {
>> +      featureGenerator = GeneratorFactory.create(new
>> ByteArrayInputStream(
>> +          generatorDescriptor), new FeatureGeneratorResourceProvider() {
>> +
>> +        public Object getResource(String key) {
>> +          return resources.get(key);
>> +        }
>> +      });
>> +    } else {
>> +      featureGenerator = null;
>> +    }
>> +
>> +    return featureGenerator;
>> +  }
>> +
>>    public Span[] find(String[] tokens) {
>>      return find(tokens, EMPTY);
>>    }
>> @@ -328,6 +340,26 @@ public class NameFinderME implements Tok
>>       return sprobs;
>>     }
>>
>> +   /**
>> +    * Trains a name finder model.
>> +    *
>> +    * @param languageCode
>> +    *          the language of the training data
>> +    * @param type
>> +    *          null or an override type for all types in the training
>> data
>> +    * @param samples
>> +    *          the training data
>> +    * @param trainParams
>> +    *          machine learning train parameters
>> +    * @param generator
>> +    *          null or the feature generator
>> +    * @param resources
>> +    *          the resources for the name finder or null if none
>> +    *
>> +    * @return the newly trained model
>> +    *
>> +    * @throws IOException
>> +    */
>>     public static TokenNameFinderModel train(String languageCode, String
>> type, ObjectStream<NameSample>  samples,
>>         TrainingParameters trainParams, AdaptiveFeatureGenerator
>> generator, final Map<String, Object>  resources) throws IOException {
>>
>> @@ -358,6 +390,34 @@ public class NameFinderME implements Tok
>>           resources, manifestInfoEntries);
>>     }
>>
>> +  /**
>> +   * Trains a name finder model.
>> +   *
>> +   * @param languageCode
>> +   *          the language of the training data
>> +   * @param type
>> +   *          null or an override type for all types in the training data
>> +   * @param samples
>> +   *          the training data
>> +   * @param trainParams
>> +   *          machine learning train parameters
>> +   * @param featureGeneratorBytes
>> +   *          descriptor to configure the feature generation or null
>> +   * @param resources
>> +   *          the resources for the name finder or null if none
>> +   *
>> +   * @return the newly trained model
>> +   *
>> +   * @throws IOException
>> +   */
>> +  public static TokenNameFinderModel train(String languageCode, String
>> type,
>> +      ObjectStream<NameSample>  samples, TrainingParameters trainParams,
>> +      byte[] featureGeneratorBytes, final Map<String, Object>  resources)
>> +      throws IOException {
>> +    return train(languageCode, type, samples, trainParams,
>> +        createFeatureGenerator(featureGeneratorBytes, resources),
>> resources);
>> +  }
>> +
>>     /**
>>      * Trains a name finder model.
>>      *
>> @@ -403,19 +463,7 @@ public class NameFinderME implements Tok
>>
>>       // TODO: Pass in resource manager ...
>>
>> -     AdaptiveFeatureGenerator featureGenerator;
>> -
>> -     if (generatorDescriptor != null) {
>> -       featureGenerator = GeneratorFactory.create(new
>> ByteArrayInputStream(generatorDescriptor), new
>> FeatureGeneratorResourceProvider() {
>> -
>> -        public Object getResource(String key) {
>> -          return resources.get(key);
>> -        }
>> -      });
>> -     }
>> -     else {
>> -       featureGenerator = null;
>> -     }
>> +     AdaptiveFeatureGenerator featureGenerator =
>> createFeatureGenerator(generatorDescriptor, resources);
>>
>>       TokenNameFinderModel model = train(languageCode, type, samples,
>> featureGenerator,
>>           resources, iterations, cutoff);
>> @@ -427,7 +475,6 @@ public class NameFinderME implements Tok
>>       return model;
>>     }
>>
>> -
>>    @Deprecated
>>    public static GISModel train(EventStream es, int iterations, int cut)
>> throws IOException {
>>      return GIS.trainModel(iterations, new TwoPassDataIndexer(es, cut));
>>
>>
>>
>

Re: svn commit: r1130898 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/namefind/TokenNameFinderTrainerTool.java namefind/NameFinderME.java

Reply via email to