tools...

Joern Kottmann Tue, 26 Apr 2016 12:50:20 -0700

Please always mention the issue number in the commit message.

Thanks,
Jörn


On Fri, 2016-03-11 at 17:37 +0000, [email protected] wrote:
> Author: beylerian
> Date: Fri Mar 11 17:37:07 2016
> New Revision: 1734600
> 
> URL: http://svn.apache.org/viewvc?rev=1734600&view=rev
> Log:
> added unit tests, corrected some mistakes, need more unit tests
> 
> Added:
>     opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java
>       - copied, changed from r1733577, opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
>     opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java
>       - copied, changed from r1733577, opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/OSCCTester.java
> Removed:
>     opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
>     opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/OSCCTester.java
> Modified:
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGe
> nerator.java
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContext
> Generator.java
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerat
> or.java
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java
>     opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.jav
> a
>     opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
>     opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
>     opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/WSDParameters.java?rev=1734600&r1=
> 1734599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
> (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java Fri
> Mar 11 17:37:07 2016
> @@ -27,6 +27,7 @@ public abstract class WSDParameters {
>  
>    protected boolean isCoarseSense;
>    public static boolean isStemCompare;
> +  protected boolean returnMultiple;
>  
>    public static enum SenseSource {
>      WORDNET, WSDHELPER, OTHER;
> @@ -61,8 +62,17 @@ public abstract class WSDParameters {
>      this.senseSource = senseSource;
>    }
>  
> +  public boolean isReturnMultiple() {
> +    return returnMultiple;
> +  }
> +
> +  public void setReturnMultiple(boolean returnMultiple) {
> +    this.returnMultiple = returnMultiple;
> +  }
> +
>    public WSDParameters() {
>      this.isCoarseSense = false;
> +    this.returnMultiple = false;
>    }
>  
>    /**
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/WSDisambiguator.java?rev=1734600&r
> 1=1734599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
> (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
> Fri Mar 11 17:37:07 2016
> @@ -150,7 +150,7 @@ public abstract class WSDisambiguator {
>  
>          if (WSDHelper.getNonRelevWordsDef(tokenTags[i]) != null) {
>            String s = IMSParameters.SenseSource.WSDHELPER.name() + "
> "
> -              + tokenTags[i];
> +              + WSDHelper.getNonRelevWordsDef(tokenTags[i]);
>            String[] sense = { s };
>  
>            senses.add(sense);
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGe
> nerator.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.jav
> a?rev=1734600&r1=1734599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGe
> nerator.java (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGe
> nerator.java Fri Mar 11 17:37:07 2016
> @@ -55,8 +55,9 @@ public class DefaultIMSContextGenerator
>    }
>  
>    public String[] extractSurroundingWords(int index, String[] toks,
> -      String[] lemmas) {
> +      String[] lemmas, int windowSize) {
>  
> +    // TODO consider the windowSize 
>      ArrayList<String> contextWords = new ArrayList<String>();
>  
>      for (int i = 0; i < toks.length; i++) {
> @@ -123,7 +124,7 @@ public class DefaultIMSContextGenerator
>  
>      HashSet<String> surroundingWords = new HashSet<>();
>      surroundingWords.addAll(Arrays.asList(extractSurroundingWords(in
> dex, toks,
> -        lemmas)));
> +        lemmas, windowSize)));
>  
>      String[] localCollocations = extractLocalCollocations(index,
> toks, ngram);
>  
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/ims/IMSME.java?rev=1734600&r1=1734
> 599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java
> (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSME.java Fri Mar
> 11 17:37:07 2016
> @@ -41,16 +41,17 @@ public class IMSME extends WSDisambiguat
>  
>    protected static IMSContextGenerator cg = new
> DefaultIMSContextGenerator();
>  
> -  public IMSME(IMSParameters params){
> +  public IMSME(IMSParameters params) {
>      this.params = params;
>    }
> -  
> +
>    public IMSME(IMSModel model, IMSParameters params) {
>      this.imsModel = model;
>      this.params = params;
> -    
> -//    Assert.assertEquals(model.getWindowSize(),params.getWindowSize
> ());
> -//    Assert.assertEquals(model.getNgram(),params.getNgram());
> +  }
> +
> +  public IMSModel getModel() {
> +    return imsModel;
>    }
>  
>    public void setModel(IMSModel model) {
> @@ -65,7 +66,7 @@ public class IMSME extends WSDisambiguat
>        TrainingParameters mlParams, IMSParameters imsParams,
>        IMSFactory imsfactory) throws IOException {
>  
> -    ArrayList<String> surroundingWordModel =
> buildSurroundingWords(samples);
> +    ArrayList<String> surroundingWordModel =
> buildSurroundingWords(samples, imsParams.getWindowSize());
>  
>      HashMap<String, String> manifestInfoEntries = new
> HashMap<String, String>();
>  
> @@ -88,13 +89,13 @@ public class IMSME extends WSDisambiguat
>  
>          events.add(ev);
>  
> -        es = ObjectStreamUtils.createObjectStream(events);
> -
>        } while ((sample = samples.read()) != null);
>      }
>  
> -    EventTrainer trainer = TrainerFactory.getEventTrainer(
> -        mlParams.getSettings(), manifestInfoEntries);
> +    es = ObjectStreamUtils.createObjectStream(events);
> +
> +    EventTrainer trainer = TrainerFactory
> +        .getEventTrainer(mlParams.getSettings(),
> manifestInfoEntries);
>      imsModel = trainer.train(es);
>  
>      return new IMSModel(lang, wordTag, imsParams.windowSize,
> imsParams.ngram,
> @@ -102,13 +103,13 @@ public class IMSME extends WSDisambiguat
>    }
>  
>    public static ArrayList<String> buildSurroundingWords(
> -      ObjectStream<WSDSample> samples) throws IOException {
> +      ObjectStream<WSDSample> samples, int windowSize) throws
> IOException {
>      DefaultIMSContextGenerator imsCG = new
> DefaultIMSContextGenerator();
>      ArrayList<String> surroundingWordsModel = new
> ArrayList<String>();
>      WSDSample sample;
>      while ((sample = samples.read()) != null) {
> -      String[] words = imsCG.extractSurroundingWords(
> -          sample.getTargetPosition(), sample.getSentence(),
> sample.getLemmas());
> +      String[] words =
> imsCG.extractSurroundingWords(sample.getTargetPosition(),
> +          sample.getSentence(), sample.getLemmas(), windowSize);
>  
>        if (words.length > 0) {
>          for (String word : words) {
> @@ -125,10 +126,11 @@ public class IMSME extends WSDisambiguat
>      if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
>        String wordTag = sample.getTargetWordTag();
>  
> -      String trainingFile = ((IMSParameters) this.getParams())
> -          .getTrainingDataDirectory() + sample.getTargetWordTag();
> +      if (imsModel == null
> +          ||
> !imsModel.getWordTag().equals(sample.getTargetWordTag())) {
>  
> -      if (imsModel==null ||
> !imsModel.getWordTag().equals(sample.getTargetWordTag())) {
> +        String trainingFile = ((IMSParameters) this.getParams())
> +            .getTrainingDataDirectory() + sample.getTargetWordTag();
>  
>          File file = new File(trainingFile + ".ims.model");
>          if (file.exists() && !file.isDirectory()) {
> @@ -167,11 +169,11 @@ public class IMSME extends WSDisambiguat
>            }
>  
>          } else {
> -
>            MFS mfs = new MFS();
>            return mfs.disambiguate(wordTag);
>          }
>        } else {
> +
>          String outcome = "";
>  
>          String[] context = cg.getContext(sample,
> @@ -226,8 +228,8 @@ public class IMSME extends WSDisambiguat
>     */
>    public String[] disambiguate(String[] tokenizedContext, String[]
> tokenTags,
>        String[] lemmas, int index) {
> -    return disambiguate(new WSDSample(tokenizedContext, tokenTags,
> lemmas,
> -        index));
> +    return disambiguate(
> +        new WSDSample(tokenizedContext, tokenTags, lemmas, index));
>    }
>  
>  }
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/ims/IMSParameters.java?rev=1734600
> &r1=1734599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
> (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSParameters.java
> Fri Mar 11 17:37:07 2016
> @@ -20,6 +20,7 @@
>  package opennlp.tools.disambiguator.ims;
>  
>  import java.io.File;
> +import java.security.InvalidParameterException;
>  
>  import opennlp.tools.disambiguator.WSDParameters;
>  
> @@ -34,6 +35,11 @@ public class IMSParameters extends WSDPa
>    protected int ngram;
>  
>    protected String trainingDataDirectory;
> +  
> +  protected static final int DFLT_WIN_SIZE = 3;
> +  protected static final int DFLT_NGRAM = 2;
> +  protected static final String DFLT_LANG_CODE = "En";
> +  protected static final SenseSource DFLT_SOURCE =
> SenseSource.WORDNET;
>  
>    /**
>     * This constructor takes only two parameters. The default
> language used is
> @@ -49,8 +55,9 @@ public class IMSParameters extends WSDPa
>     *          the source of the training data
>     */
>    public IMSParameters(int windowSize, int ngram, SenseSource
> senseSource,
> -      String trainingDataDirectory) {
> -    this.languageCode = "En";
> +      String trainingDataDirectory){
> +   
> +    this.languageCode = DFLT_LANG_CODE;
>      this.windowSize = windowSize;
>      this.ngram = ngram;
>      this.senseSource = senseSource;
> @@ -63,19 +70,7 @@ public class IMSParameters extends WSDPa
>    }
>  
>    public IMSParameters(String trainingDataDirectory) {
> -    this(3, 2, SenseSource.WORDNET, trainingDataDirectory);
> -
> -    File folder = new File(trainingDataDirectory);
> -    if (!folder.exists())
> -      folder.mkdirs();
> -  }
> -
> -  public IMSParameters() {
> -    this(3, 2, SenseSource.WORDNET, null);
> -  }
> -
> -  public IMSParameters(int windowSize, int ngram) {
> -    this(windowSize, ngram, SenseSource.WORDNET, null);
> +    this(DFLT_WIN_SIZE, DFLT_NGRAM, DFLT_SOURCE,
> trainingDataDirectory);
>    }
>  
>    public String getLanguageCode() {
> @@ -109,7 +104,6 @@ public class IMSParameters extends WSDPa
>     * Creates the context generator of IMS
>     */
>    public IMSContextGenerator createContextGenerator() {
> -
>      return new DefaultIMSContextGenerator();
>    }
>  
> @@ -123,7 +117,7 @@ public class IMSParameters extends WSDPa
>  
>    @Override
>    public boolean isValid() {
> -    // TODO Auto-generated method stub
> +    // TODO recheck this pattern switch to maps
>      return true;
>    }
>  
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/mfs/MFS.java?rev=1734600&r1=173459
> 9&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/mfs/MFS.java Fri Mar 11
> 17:37:07 2016
> @@ -194,9 +194,12 @@ public class MFS extends WSDisambiguator
>        WordPOS wordPOS = new WordPOS(word, pos);
>  
>        ArrayList<Synset> synsets = wordPOS.getSynsets();
> -
> -      int size = synsets.size();
> -
> +      int size;
> +      if (this.parameters.isReturnMultiple()) {
> +        size = synsets.size();
> +      } else {
> +        size = 1;
> +      }
>        String[] senses = new String[size];
>  
>        for (int i = 0; i < size; i++) {
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContext
> Generator.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContextGenerator.j
> ava?rev=1734600&r1=1734599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContext
> Generator.java (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/DefaultOSCCContext
> Generator.java Fri Mar 11 17:37:07 2016
> @@ -39,6 +39,7 @@ public class DefaultOSCCContextGenerator
>    public String[] extractSurroundingContextClusters(int index,
> String[] toks,
>        String[] tags, String[] lemmas, int windowSize) {
>  
> +    // TODO consider windowSize
>      ArrayList<String> contextClusters = new ArrayList<String>();
>  
>      for (int i = 0; i < toks.length; i++) {
> @@ -49,19 +50,19 @@ public class DefaultOSCCContextGenerator
>  
>            String lemma = lemmas[i].toLowerCase().replaceAll("[^a-
> z_]", "")
>                .trim();
> -          
> -          WordPOS word = new WordPOS(lemma, tags[i]);
>  
> -          // TODO check fix for "_" and null pointers
> -          if (lemma.length() > 1 && !lemma.contains("_")) {
> -            try{
> -            ArrayList<Synset> synsets = word.getSynsets();
> -            if (synsets!=null && synsets.size() > 0 ){
> -              contextClusters.add(synsets.get(0).getOffset() + "");
> -            }
> -            }catch(NullPointerException ex)
> -            {
> -              //TODO tagger mistake add proper exception
> +          WordPOS word = new WordPOS(lemma, tags[i]);
> +      
> +          if (lemma.length() > 1) {
> +            try {
> +              ArrayList<Synset> synsets = word.getSynsets();
> +              if (synsets != null && synsets.size() > 0) {
> +                for (Synset syn : synsets){
> +                  contextClusters.add(syn.getOffset() + "");
> +                }
> +              }
> +            } catch (NullPointerException ex) {
> +              // TODO tagger mistake add proper exception
>              }
>            }
>  
> @@ -80,30 +81,32 @@ public class DefaultOSCCContextGenerator
>     */
>    @Override
>    public String[] getContext(int index, String[] toks, String[]
> tags,
> -      String[] lemmas, int windowSize) {
> +      String[] lemmas, int windowSize, ArrayList<String> model) {
>  
>      HashSet<String> surroundingContextClusters = new HashSet<>();
> -    surroundingContextClusters.addAll(Arrays
> -        .asList(extractSurroundingContextClusters(index, toks, tags,
> lemmas,
> -            windowSize)));
> +    surroundingContextClusters
> +        .addAll(Arrays.asList(extractSurroundingContextClusters(inde
> x, toks,
> +            tags, lemmas, windowSize)));
>  
> -    String[] serializedFeatures = new
> String[surroundingContextClusters.size()];
> +    String[] serializedFeatures = new String[model.size()];
>  
>      int i = 0;
> -
> -    for (String feature : surroundingContextClusters) {
> -      serializedFeatures[i] = "F" + i + "=" + feature;
> +    for (String word : model) {
> +      if (surroundingContextClusters.contains(word.toString())) {
> +        serializedFeatures[i] = "F" + i + "=1";
> +      } else {
> +        serializedFeatures[i] = "F" + i + "=0";
> +      }
>        i++;
>      }
>  
>      return serializedFeatures;
> -
>    }
>  
> -  public String[] getContext(WSDSample sample, int windowSize) {
> +  public String[] getContext(WSDSample sample, int windowSize,
> ArrayList<String> model) {
>  
>      return getContext(sample.getTargetPosition(),
> sample.getSentence(),
> -        sample.getTags(), sample.getLemmas(), windowSize);
> +        sample.getTags(), sample.getLemmas(), windowSize, model);
>    }
>  
>  }
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerat
> or.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerator.java?rev
> =1734600&r1=1734599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerat
> or.java (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCContextGenerat
> or.java Fri Mar 11 17:37:07 2016
> @@ -19,6 +19,8 @@
>  
>  package opennlp.tools.disambiguator.oscc;
>  
> +import java.util.ArrayList;
> +
>  import opennlp.tools.disambiguator.WSDSample;
>  
>  /**
> @@ -27,7 +29,7 @@ import opennlp.tools.disambiguator.WSDSa
>  public interface OSCCContextGenerator {
>  
>    String[] getContext(int index, String[] toks, String[] tags,
> String[] lemmas,
> -    int windowSize);
> +    int windowSize, ArrayList<String> model);
>  
> -  String[] getContext(WSDSample sample, int windowSize);
> +  String[] getContext(WSDSample sample, int windowSize,
> ArrayList<String> model);
>  }
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/oscc/OSCCME.java?rev=1734600&r1=17
> 34599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java
> (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCME.java Fri
> Mar 11 17:37:07 2016
> @@ -22,7 +22,6 @@ import java.io.IOException;
>  import java.util.ArrayList;
>  import java.util.HashMap;
>  
> -import junit.framework.Assert;
>  import opennlp.tools.disambiguator.WSDHelper;
>  import opennlp.tools.disambiguator.WSDSample;
>  import opennlp.tools.disambiguator.WSDisambiguator;
> @@ -53,11 +52,11 @@ import opennlp.tools.util.TrainingParame
>   * Please see {@link DefaultOSCCContextGenerator}
>   * 
>   * The approach finds the context clusters surrounding the target
> and uses a
> - * classifier to judge on the best case. 
> + * classifier to judge on the best case.
>   * 
>   * Here an ME classifier is used.
>   * 
> -*/
> + */
>  public class OSCCME extends WSDisambiguator {
>  
>    protected OSCCModel osccModel;
> @@ -69,10 +68,12 @@ public class OSCCME extends WSDisambigua
>    }
>  
>    public OSCCME(OSCCModel model, OSCCParameters params) {
> -    this.osccModel = osccModel;
> +    this.osccModel = model;
>      this.params = params;
> +  }
>  
> -    Assert.assertEquals(model.getWindowSize(),
> params.getWindowSize());
> +  public OSCCModel getModel() {
> +    return osccModel;
>    }
>  
>    public void setModel(OSCCModel model) {
> @@ -85,7 +86,10 @@ public class OSCCME extends WSDisambigua
>  
>    public static OSCCModel train(String lang, ObjectStream<WSDSample>
> samples,
>        TrainingParameters mlParams, OSCCParameters osccParams,
> -      OSCCFactory imsfactory) throws IOException {
> +      OSCCFactory osccFactory) throws IOException {
> +
> +    ArrayList<String> surroundingClusterModel =
> buildSurroundingClusters(
> +        samples, osccParams.getWindowSize());
>  
>      HashMap<String, String> manifestInfoEntries = new
> HashMap<String, String>();
>  
> @@ -99,39 +103,57 @@ public class OSCCME extends WSDisambigua
>      if (sample != null) {
>        wordTag = sample.getTargetWordTag();
>        do {
> -
>          String sense = sample.getSenseIDs().get(0);
> -
> -        String[] context = cg.getContext(sample,
> osccParams.windowSize);
> +        String[] context = cg.getContext(sample,
> osccParams.windowSize,
> +            surroundingClusterModel);
>          Event ev = new Event(sense + "", context);
> -
>          events.add(ev);
> -
> -        es = ObjectStreamUtils.createObjectStream(events);
> -
>        } while ((sample = samples.read()) != null);
>      }
>  
> -    EventTrainer trainer = TrainerFactory.getEventTrainer(
> -        mlParams.getSettings(), manifestInfoEntries);
> +    es = ObjectStreamUtils.createObjectStream(events);
> +    EventTrainer trainer = TrainerFactory
> +        .getEventTrainer(mlParams.getSettings(),
> manifestInfoEntries);
> +
>      osccModel = trainer.train(es);
>  
> -    return new OSCCModel(lang, wordTag, osccParams.windowSize,
> osccModel, manifestInfoEntries, imsfactory);
> +    return new OSCCModel(lang, wordTag, osccParams.windowSize,
> osccModel,
> +        surroundingClusterModel, manifestInfoEntries, osccFactory);
>    }
>  
> +  public static ArrayList<String> buildSurroundingClusters(
> +      ObjectStream<WSDSample> samples, int windowSize) throws
> IOException {
> +    // TODO modify to clusters
> +    DefaultOSCCContextGenerator osccCG = new
> DefaultOSCCContextGenerator();
> +    ArrayList<String> surroundingWordsModel = new
> ArrayList<String>();
> +    WSDSample sample;
> +    while ((sample = samples.read()) != null) {
> +      String[] words = osccCG.extractSurroundingContextClusters(
> +          sample.getTargetPosition(), sample.getSentence(),
> sample.getTags(),
> +          sample.getLemmas(), windowSize);
> +
> +      if (words.length > 0) {
> +        for (String word : words) {
> +          surroundingWordsModel.add(word);
> +        }
> +      }
> +    }
> +    samples.reset();
> +    return surroundingWordsModel;
> +  }
>  
>    @Override
>    public String[] disambiguate(WSDSample sample) {
>      if (WSDHelper.isRelevantPOSTag(sample.getTargetTag())) {
>        String wordTag = sample.getTargetWordTag();
>  
> -      String trainingFile = ((OSCCParameters) this.getParams())
> -          .getTrainingDataDirectory() + sample.getTargetWordTag();
> -
>        if (osccModel == null
>            ||
> !osccModel.getWordTag().equals(sample.getTargetWordTag())) {
>  
> -        File file = new File(trainingFile + ".ims.model");
> +        String trainingFile = ((OSCCParameters) this.getParams())
> +            .getTrainingDataDirectory() + sample.getTargetWordTag();
> +
> +        File file = new File(trainingFile + ".oscc.model");
>          if (file.exists() && !file.isDirectory()) {
>            try {
>              setModel(new OSCCModel(file));
> @@ -147,7 +169,8 @@ public class OSCCME extends WSDisambigua
>            String outcome = "";
>  
>            String[] context = cg.getContext(sample,
> -              ((OSCCParameters) this.params).windowSize);
> +              ((OSCCParameters) this.params).windowSize,
> +              osccModel.getContextClusters());
>  
>            double[] outcomeProbs =
> osccModel.getOSCCMaxentModel().eval(context);
>            outcome =
> osccModel.getOSCCMaxentModel().getBestOutcome(outcomeProbs);
> @@ -174,7 +197,8 @@ public class OSCCME extends WSDisambigua
>          String outcome = "";
>  
>          String[] context = cg.getContext(sample,
> -            ((OSCCParameters) this.params).windowSize);
> +            ((OSCCParameters) this.params).windowSize,
> +            osccModel.getContextClusters());
>  
>          double[] outcomeProbs =
> osccModel.getOSCCMaxentModel().eval(context);
>          outcome =
> osccModel.getOSCCMaxentModel().getBestOutcome(outcomeProbs);
> @@ -223,8 +247,8 @@ public class OSCCME extends WSDisambigua
>     */
>    public String[] disambiguate(String[] tokenizedContext, String[]
> tokenTags,
>        String[] lemmas, int index) {
> -    return disambiguate(new WSDSample(tokenizedContext, tokenTags,
> lemmas,
> -        index));
> +    return disambiguate(
> +        new WSDSample(tokenizedContext, tokenTags, lemmas, index));
>    }
>  
>  }
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/oscc/OSCCModel.java?rev=1734600&r1
> =1734599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java
> (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCModel.java Fri
> Mar 11 17:37:07 2016
> @@ -21,6 +21,7 @@ import java.io.File;
>  import java.io.IOException;
>  import java.io.InputStream;
>  import java.util.ArrayList;
> +import java.util.Arrays;
>  import java.util.Map;
>  import java.util.Properties;
>  import java.net.URL;
> @@ -43,13 +44,13 @@ public class OSCCModel extends BaseModel
>    private static final String WINSIZE = "winsize";
>    private static final String CONTEXTCLUSTERS = "contextclusters";
>  
> -  //private ArrayList<String> contextClusters = new
> ArrayList<String>();
> +  private ArrayList<String> contextClusters = new
> ArrayList<String>();
>    private String wordTag;
>    private int windowSize;
>  
> -  /*public ArrayList<String> getContextClusters() {
> +  public ArrayList<String> getContextClusters() {
>      return contextClusters;
> -  }*/
> +  }
>  
>    public int getWindowSize() {
>      return windowSize;
> @@ -59,9 +60,9 @@ public class OSCCModel extends BaseModel
>      this.windowSize = windowSize;
>    }
>  
> - /* public void setContextClusters(ArrayList<String>
> contextClusters) {
> +  public void setContextClusters(ArrayList<String> contextClusters)
> {
>      this.contextClusters = contextClusters;
> -  }*/
> +  }
>  
>    public String getWordTag() {
>      return wordTag;
> @@ -72,7 +73,7 @@ public class OSCCModel extends BaseModel
>    }
>  
>     public OSCCModel(String languageCode, String wordTag, int
> windowSize,
> -   MaxentModel osccModel,
> +   MaxentModel osccModel, ArrayList<String> contextClusters,
>        Map<String, String> manifestInfoEntries, OSCCFactory factory)
> {
>      super(COMPONENT_NAME, languageCode, manifestInfoEntries,
> factory);
>  
> @@ -80,17 +81,17 @@ public class OSCCModel extends BaseModel
>      this.setManifestProperty(WORDTAG, wordTag);
>      this.setManifestProperty(WINSIZE, windowSize + "");
>      
> -//    this.setManifestProperty(CONTEXTCLUSTERS,
> -//        StringUtils.join(contextClusters, ","));
> +    this.setManifestProperty(CONTEXTCLUSTERS,
> +        StringUtils.join(contextClusters, ","));
>  
> -    //this.contextClusters = contextClusters;
> +    this.contextClusters = contextClusters;
>      checkArtifactMap();
>    }
>  
>    public OSCCModel(String languageCode, String wordTag, int
> windowSize,
> -      int ngram, MaxentModel osccModel, 
> +      int ngram, MaxentModel osccModel, ArrayList<String>
> contextClusters,
>        OSCCFactory factory) {
> -    this(languageCode, wordTag, windowSize, osccModel,
> +    this(languageCode, wordTag, windowSize, osccModel,
> contextClusters,
>          null, factory);
>    }
>  
> @@ -135,10 +136,10 @@ public class OSCCModel extends BaseModel
>  
>    public void updateAttributes() {
>      Properties manifest = (Properties)
> artifactMap.get(MANIFEST_ENTRY);
> -    //String contextClusters = (String)
> manifest.get(CONTEXTCLUSTERS);
> +    String contextClusters = (String) manifest.get(CONTEXTCLUSTERS);
>  
> -   /* this.contextClusters = new ArrayList(
> -        Arrays.asList(contextClusters.split(",")));*/
> +    this.contextClusters = new ArrayList(
> +        Arrays.asList(contextClusters.split(",")));
>      this.wordTag = (String) manifest.get(WORDTAG);
>      this.windowSize = Integer.parseInt((String)
> manifest.get(WINSIZE));
>    }
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.jav
> a
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/mai
> n/java/opennlp/tools/disambiguator/oscc/OSCCParameters.java?rev=17346
> 00&r1=1734599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.jav
> a (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/main/java/opennlp/tools/disambiguator/oscc/OSCCParameters.jav
> a Fri Mar 11 17:37:07 2016
> @@ -70,13 +70,11 @@ public class OSCCParameters extends WSDP
>    }
>  
>    public OSCCParameters() {
> -    // TODO change the "" into null ??
> -    this(DFLT_WIN_SIZE, DFLT_SOURCE, "");
> +    this(DFLT_WIN_SIZE, DFLT_SOURCE, null);
>    }
>  
>    public OSCCParameters(int windowSize) {
> -    // TODO change the "" into null ??
> -    this(windowSize, DFLT_SOURCE, "");
> +    this(windowSize, DFLT_SOURCE, null);
>    }
>  
>    public String getLanguageCode() {
> 
> Copied: opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java (from
> r1733577, opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java)
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/tes
> t/java/opennlp/tools/disambiguator/IMSMETester.java?p2=opennlp/sandbo
> x/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java&p1=ope
> nnlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java&r1=17335
> 77&r2=1734600&rev=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
> (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/IMSMETester.java Fri
> Mar 11 17:37:07 2016
> @@ -19,107 +19,173 @@
>  
>  package opennlp.tools.disambiguator;
>  
> +import static org.junit.Assert.*;
> +
> +import java.io.File;
> +import java.io.IOException;
>  import java.util.ArrayList;
>  import java.util.List;
>  
> +import org.junit.BeforeClass;
> +import org.junit.Test;
> +
> +import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
> +import opennlp.tools.disambiguator.ims.IMSFactory;
>  import opennlp.tools.disambiguator.ims.IMSME;
> +import opennlp.tools.disambiguator.ims.IMSModel;
>  import opennlp.tools.disambiguator.ims.IMSParameters;
> +import opennlp.tools.util.ObjectStream;
>  import opennlp.tools.util.Span;
> +import opennlp.tools.util.TrainingParameters;
>  
>  /**
> - * This is a typical example of how to call the disambiguation
> function in the
> - * IMS class.
> - * <ul>
> - * <li>In the 2 first examples, the training data exist, therefore
> the IMS
> - * approach is used.</li>
> - * <li>In the 3rd example, the training data for the word to
> disambiguate are
> - * absent, therefore the Most Frequent Sents (MFS) is returend</li>
> - * </ul>
> + * This is the test class for {@link IMSME}.
> + * 
> + * The scope of this test is to make sure that the IMS disambiguator
> code can be
> + * executed. This test can not detect mistakes which lead to
> incorrect feature
> + * generation or other mistakes which decrease the disambiguation
> performance of the
> + * disambiguator.
> + * 
> + * In this test the {@link IMSME} is trained with Semcor and then
> the computed
> + * model is used to predict sentences from the training sentences.
>   */
> -public class IMSTester {
> -
> -  public static void main(String[] args) {
> -
> -    // TODO write unit test
> -    
> -    String modelsDir = "src\\test\\resources\\models\\";
> +public class IMSMETester {
> +  // TODO write more tests
> +  // TODO modify when we fix the parameter model
> +
> +  static String modelsDir = "src\\test\\resources\\models\\";
> +  static String trainingDataDirectory =
> "src\\test\\resources\\supervised\\models\\";
> +
> +  static IMSParameters IMSParams;
> +  static IMSME ims;
> +  static IMSFactory IMSFactory;
> +  static IMSModel model;
> +
> +  static String test = "please.v";
> +  static File outFile;
> +
> +  static String test1 = "We need to discuss an important topic,
> please write to me soon.";
> +  static String test2 = "The component was highly radioactive to the
> point that"
> +      + " it has been activated the second it touched water";
> +  static String test3 = "The summer is almost over and I did not go
> to the beach even once";
> +
> +  static String[] sentence1;
> +  static String[] sentence2;
> +  static String[] sentence3;
> +
> +  static String[] tags1;
> +  static String[] tags2;
> +  static String[] tags3;
> +
> +  static String[] lemmas1;
> +  static String[] lemmas2;
> +  static String[] lemmas3;
> +
> +  /*
> +   * Setup the testing variables
> +   */
> +  @BeforeClass
> +  public static void setUpAndTraining() {
>      WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
>      WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
>      WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
>  
> -    IMSParameters params = new IMSParameters("");
> -
> -    WSDHelper.print(params.getTrainingDataDirectory());
> +    sentence1 = WSDHelper.getTokenizer().tokenize(test1);
> +    sentence2 = WSDHelper.getTokenizer().tokenize(test2);
> +    sentence3 = WSDHelper.getTokenizer().tokenize(test3);
> +
> +    tags1 = WSDHelper.getTagger().tag(sentence1);
> +    tags2 = WSDHelper.getTagger().tag(sentence2);
> +    tags3 = WSDHelper.getTagger().tag(sentence3);
>  
> -    IMSME ims = new IMSME(params);
> -
> -  
> -    // This is how to make the context for one-word-disambiguation
> using IMS
> -     
> -    String test1 = "We need to discuss important topic, please write
> to me soon.";
> -    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
> -    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
>      List<String> tempLemmas1 = new ArrayList<String>();
>      for (int i = 0; i < sentence1.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence1[i], tags1[i]);
> -      tempLemmas1.add(lemma);
> +      tempLemmas1
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i],
> tags1[i]));
>      }
> -    String[] lemmas1 = tempLemmas1.toArray(new
> String[tempLemmas1.size()]);
> +    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
>  
> -    // output
> -    String[] senses1 = ims.disambiguate(sentence1, tags1, lemmas1,
> 8);
> -    System.out.print(lemmas1[8] + " :\t");
> -    WSDHelper.print(senses1);
> -    WSDHelper.print("*****************************");
> -
> -    // This is how to make the context for disambiguation of span of
> words
> -    
> -    String test2 = "The component was highly radioactive to the
> point that"
> -        + " it has been activated the second it touched water";
> -    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
> -    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
>      List<String> tempLemmas2 = new ArrayList<String>();
>      for (int i = 0; i < sentence2.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence2[i], tags2[i]);
> -      tempLemmas2.add(lemma);
> +      tempLemmas2
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i],
> tags2[i]));
>      }
> -    String[] lemmas2 = tempLemmas2.toArray(new
> String[tempLemmas2.size()]);
> -    Span span = new Span(3, 7);
> -
> -    // output
> -    List<String[]> senses2 = ims.disambiguate(sentence2, tags2,
> lemmas2, span);
> -    for (int i = span.getStart(); i < span.getEnd() + 1; i++) {
> -      String[] senses = senses2.get(i - span.getStart());
> -      System.out.print(lemmas2[i] + " :\t");
> -      WSDHelper.print(senses);
> -      WSDHelper.print("----------");
> -    }
> -
> -    WSDHelper.print("*****************************");
> +    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
>  
> -    // This is how to make the context for all-words-disambiguation
> -    
> -    String test3 = "The summer almost over and I not to the beach
> even once";
> -    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
> -    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
>      List<String> tempLemmas3 = new ArrayList<String>();
>      for (int i = 0; i < sentence3.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence3[i], tags3[i]);
> -      tempLemmas3.add(lemma);
> +      tempLemmas3
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i],
> tags3[i]));
>      }
> -    String[] lemmas3 = tempLemmas3.toArray(new
> String[tempLemmas3.size()]);
> +    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
>  
> -    // output
> -    List<String[]> senses3 = ims.disambiguate(sentence3, tags3,
> lemmas3);
> -    for (int i = 0; i < sentence3.length; i++) {
> -      String[] senses = senses3.get(i);
> -      System.out.print(lemmas3[i] + " :\t");
> -      WSDHelper.print(senses);
> -      WSDHelper.print("----------");
> +    IMSParams = new IMSParameters("");
> +    IMSParams.setTrainingDataDirectory(trainingDataDirectory);
> +    IMSFactory = new IMSFactory();
> +    TrainingParameters trainingParams = new TrainingParameters();
> +    SemcorReaderExtended sr = new SemcorReaderExtended();
> +    ObjectStream<WSDSample> sampleStream =
> sr.getSemcorDataStream(test);
> +
> +    IMSModel writeModel = null;
> +    /*
> +     * Tests training the disambiguator We test both writing and
> reading a model
> +     * file trained by semcor
> +     */
> +
> +    try {
> +      writeModel = IMSME.train("en", sampleStream, trainingParams,
> IMSParams,
> +          IMSFactory);
> +      assertNotNull("Checking the model to be written", writeModel);
> +      writeModel.writeModel(IMSParams.getTrainingDataDirectory() +
> test);
> +      outFile = new File(
> +          IMSParams.getTrainingDataDirectory() + test +
> ".ims.model");
> +      model = new IMSModel(outFile);
> +      assertNotNull("Checking the read model", model);
> +      ims = new IMSME(model, IMSParams);
> +      assertNotNull("Checking the disambiguator", ims);
> +    } catch (IOException e1) {
> +      e1.printStackTrace();
> +      fail("Exception in training");
>      }
>    }
>  
> +  /*
> +   * Tests disambiguating only one word : The ambiguous word
> "please"
> +   */
> +  @Test
> +  public void testOneWordDisambiguation() {
> +    String[] senses = ims.disambiguate(sentence1, tags1, lemmas1,
> 8);
> +
> +    assertEquals("Check number of senses", 1, senses.length);
> +  }
> +
> +  /*
> +   * Tests disambiguating a word Span In this case we test a mix of
> monosemous
> +   * and polysemous words as well as words that do not need
> disambiguation such
> +   * as determiners
> +   */
> +  @Test
> +  public void testWordSpanDisambiguation() {
> +    Span span = new Span(3, 7);
> +    List<String[]> senses = ims.disambiguate(sentence2, tags2,
> lemmas2, span);
> +
> +    assertEquals("Check number of returned words", 5,
> senses.size());
> +    assertEquals("Check number of senses", 1, senses.get(0).length);
> +    assertEquals("Check monosemous word", 1, senses.get(1).length);
> +    assertEquals("Check preposition", "WSDHELPER to",
> senses.get(2)[0]);
> +    assertEquals("Check determiner", "WSDHELPER determiner",
> senses.get(3)[0]);
> +  }
> +
> +  /*
> +   * Tests disambiguating all the words
> +   */
> +  @Test
> +  public void testAllWordsDisambiguation() {
> +    List<String[]> senses = ims.disambiguate(sentence3, tags3,
> lemmas3);
> +
> +    assertEquals("Check number of returned words", 15,
> senses.size());
> +    assertEquals("Check preposition", "WSDHELPER personal pronoun",
> +        senses.get(6)[0]);
> +  }
> +
>  }
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/tes
> t/java/opennlp/tools/disambiguator/LeskTester.java?rev=1734600&r1=173
> 4599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java
> (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/LeskTester.java Fri Mar
> 11 17:37:07 2016
> @@ -19,81 +19,137 @@
>  
>  package opennlp.tools.disambiguator;
>  
> +import static org.junit.Assert.assertEquals;
> +
>  import java.util.ArrayList;
>  import java.util.List;
>  
>  import opennlp.tools.disambiguator.lesk.Lesk;
>  import opennlp.tools.disambiguator.lesk.LeskParameters;
>  import opennlp.tools.disambiguator.lesk.LeskParameters.LESK_TYPE;
> +import opennlp.tools.util.Span;
>  
> +import org.junit.BeforeClass;
>  import org.junit.Test;
>  
> +/**
> + * This is the test class for {@link Lesk}.
> + * 
> + * The scope of this test is to make sure that the Lesk
> disambiguator code can be
> + * executed. This test can not detect mistakes which lead to
> incorrect feature
> + * generation or other mistakes which decrease the disambiguation
> performance of the
> + * disambiguator.
> + */
>  public class LeskTester {
> -  @Test
> -  public static void main(String[] args) {
> +  // TODO write more tests
> +
> +  static String modelsDir = "src\\test\\resources\\models\\";
> +
> +  static Lesk lesk;
> +
> +  static String test1 = "We need to discuss an important topic,
> please write to me soon.";
> +  static String test2 = "The component was highly radioactive to the
> point that"
> +      + " it has been activated the second it touched water";
> +  static String test3 = "The summer is almost over and I did not go
> to the beach even once";
> +
> +  static String[] sentence1;
> +  static String[] sentence2;
> +  static String[] sentence3;
> +
> +  static String[] tags1;
> +  static String[] tags2;
> +  static String[] tags3;
> +
> +  static String[] lemmas1;
> +  static String[] lemmas2;
> +  static String[] lemmas3;
> +
> +  /*
> +   * Setup the testing variables
> +   */
> +  @BeforeClass
> +  public static void setUp() {
>  
> -    Lesk lesk = new Lesk();
> -    LeskParameters params = new LeskParameters();
> -    params.setLeskType(LESK_TYPE.LESK_EXT);
> -    boolean a[] = { true, true, true, true, true, true, true, true,
> true, true };
> -    params.setFeatures(a);
> -    lesk.setParams(params);
> -    String modelsDir = "src\\test\\resources\\models\\";
>      WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
>      WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
>      WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
>  
> -    String test1 = "I went to the bank to deposit money.";
> -    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
> -    int targetWordIndex1 = 5;
> -    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
> +    sentence1 = WSDHelper.getTokenizer().tokenize(test1);
> +    sentence2 = WSDHelper.getTokenizer().tokenize(test2);
> +    sentence3 = WSDHelper.getTokenizer().tokenize(test3);
> +
> +    tags1 = WSDHelper.getTagger().tag(sentence1);
> +    tags2 = WSDHelper.getTagger().tag(sentence2);
> +    tags3 = WSDHelper.getTagger().tag(sentence3);
> +
>      List<String> tempLemmas1 = new ArrayList<String>();
>      for (int i = 0; i < sentence1.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence1[i], tags1[i]);
> -      tempLemmas1.add(lemma);
> +      tempLemmas1
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i],
> tags1[i]));
>      }
> -    String[] lemmas1 = tempLemmas1.toArray(new
> String[tempLemmas1.size()]);
> -    String[] results1 = lesk.disambiguate(sentence1, tags1, lemmas1,
> -        targetWordIndex1);
> -    WSDHelper.print(results1);
> -    WSDHelper.printResults(lesk, results1);
> -
> -    WSDHelper.print("----------------------------------------");
> -
> -    String test2 = "it was a strong argument that his hypothesis was
> true";
> -    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
> -    int targetWordIndex2 = 4;
> -    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
> +    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
> +
>      List<String> tempLemmas2 = new ArrayList<String>();
> -    for (int i = 0; i < sentence1.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence2[i], tags2[i]);
> -      tempLemmas2.add(lemma);
> +    for (int i = 0; i < sentence2.length; i++) {
> +      tempLemmas2
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i],
> tags2[i]));
>      }
> -    String[] lemmas2 = tempLemmas2.toArray(new
> String[tempLemmas2.size()]);
> -    String[] results2 = lesk.disambiguate(sentence2, tags2, lemmas2,
> -        targetWordIndex2);
> -    WSDHelper.print(results2);
> -    WSDHelper.printResults(lesk, results2);
> -    WSDHelper.print("----------------------------------------");
> -
> -    String test3 = "the component was highly radioactive to the
> point that it has been activated the second it touched water";
> -    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
> -    int targetWordIndex3 = 3;
> -    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
> +    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
> +
>      List<String> tempLemmas3 = new ArrayList<String>();
>      for (int i = 0; i < sentence3.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence3[i], tags3[i]);
> -      tempLemmas3.add(lemma);
> +      tempLemmas3
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i],
> tags3[i]));
>      }
> -    String[] lemmas3 = tempLemmas3.toArray(new
> String[tempLemmas3.size()]);
> -    String[] results3 = lesk.disambiguate(sentence3, tags3, lemmas3,
> -        targetWordIndex3);
> -    WSDHelper.print(results3);
> -    WSDHelper.printResults(lesk, results3);
> -    WSDHelper.print("----------------------------------------");
> +    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
> +
> +    lesk = new Lesk();
> +
> +    LeskParameters params = new LeskParameters();
> +    params.setLeskType(LESK_TYPE.LESK_EXT);
> +    boolean a[] = { true, true, true, true, true, true, true, true,
> true,
> +        true };
> +    params.setFeatures(a);
> +    lesk.setParams(params);
> +  }
> +
> +  /*
> +   * Tests disambiguating only one word : The ambiguous word
> "please"
> +   */
> +  @Test
> +  public void testOneWordDisambiguation() {
> +    String[] senses = lesk.disambiguate(sentence1, tags1, lemmas1,
> 8);
> +
> +    assertEquals("Check number of senses", 1, senses.length);
> +  }
> +
> +  /*
> +   * Tests disambiguating a word Span In this case we test a mix of
> monosemous
> +   * and polysemous words as well as words that do not need
> disambiguation such
> +   * as determiners
> +   */
> +  @Test
> +  public void testWordSpanDisambiguation() {
> +    Span span = new Span(3, 7);
> +    List<String[]> senses = lesk.disambiguate(sentence2, tags2,
> lemmas2, span);
> +
> +    assertEquals("Check number of returned words", 5,
> senses.size());
> +    assertEquals("Check number of senses", 3, senses.get(0).length);
> +    assertEquals("Check monosemous word", 1, senses.get(1).length);
> +    assertEquals("Check preposition", "WSDHELPER to",
> senses.get(2)[0]);
> +    assertEquals("Check determiner", "WSDHELPER determiner",
> senses.get(3)[0]);
> +  }
> +
> +  /*
> +   * Tests disambiguating all the words
> +   */
> +  @Test
> +  public void testAllWordsDisambiguation() {
> +    List<String[]> senses = lesk.disambiguate(sentence3, tags3,
> lemmas3);
> +
> +    assertEquals("Check number of returned words", 15,
> senses.size());
> +    assertEquals("Check preposition", "WSDHELPER personal pronoun",
> +        senses.get(6)[0]);
>    }
>  
>  }
> \ No newline at end of file
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/tes
> t/java/opennlp/tools/disambiguator/MFSTester.java?rev=1734600&r1=1734
> 599&r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java
> (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/MFSTester.java Fri Mar
> 11 17:37:07 2016
> @@ -19,96 +19,128 @@
>  
>  package opennlp.tools.disambiguator;
>  
> +import static org.junit.Assert.assertEquals;
> +
>  import java.util.ArrayList;
>  import java.util.List;
> +import org.junit.BeforeClass;
> +import org.junit.Test;
>  
>  import opennlp.tools.disambiguator.mfs.MFS;
>  import opennlp.tools.util.Span;
>  
>  /**
> - * This is a typical example of how to call the disambiguation
> function in the
> - * MFS class.
> + * This is the test class for {@link MFS}.
> + * 
> + * The scope of this test is to make sure that the MFS disambiguator
> code can be
> + * executed. This test can not detect mistakes which lead to
> incorrect feature
> + * generation or other mistakes which decrease the disambiguation
> performance of the
> + * disambiguator.
>   */
>  public class MFSTester {
> +  // TODO write more tests
> +  // TODO modify when we fix the parameter model
> +
> +  static String modelsDir = "src\\test\\resources\\models\\";
> +
> +  static MFS mfs;
> +
> +  static String test1 = "We need to discuss an important topic,
> please write to me soon.";
> +  static String test2 = "The component was highly radioactive to the
> point that"
> +      + " it has been activated the second it touched water";
> +  static String test3 = "The summer is almost over and I did not go
> to the beach even once";
> +
> +  static String[] sentence1;
> +  static String[] sentence2;
> +  static String[] sentence3;
> +
> +  static String[] tags1;
> +  static String[] tags2;
> +  static String[] tags3;
> +
> +  static String[] lemmas1;
> +  static String[] lemmas2;
> +  static String[] lemmas3;
> +
> +  /*
> +   * Setup the testing variables and the training files
> +   */
> +  @BeforeClass
> +  public static void setUpAndTraining() {
>  
> -  public static void main(String[] args) {
> -    String modelsDir = "src\\test\\resources\\models\\";
>      WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
>      WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
>      WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
>  
> -    MFS mfs = new MFS();
> +    sentence1 = WSDHelper.getTokenizer().tokenize(test1);
> +    sentence2 = WSDHelper.getTokenizer().tokenize(test2);
> +    sentence3 = WSDHelper.getTokenizer().tokenize(test3);
> +
> +    tags1 = WSDHelper.getTagger().tag(sentence1);
> +    tags2 = WSDHelper.getTagger().tag(sentence2);
> +    tags3 = WSDHelper.getTagger().tag(sentence3);
>  
> -    /**
> -     * This is how to make the context for one-word-disambiguation
> using IMS
> -     */
> -    String test1 = "We need to discuss important topic, please write
> to me soon.";
> -    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
> -    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
>      List<String> tempLemmas1 = new ArrayList<String>();
>      for (int i = 0; i < sentence1.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence1[i], tags1[i]);
> -      tempLemmas1.add(lemma);
> +      tempLemmas1
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i],
> tags1[i]));
>      }
> -    String[] lemmas1 = tempLemmas1.toArray(new
> String[tempLemmas1.size()]);
> +    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
>  
> -    // output
> -    String[] senses1 = mfs.disambiguate(sentence1, tags1, lemmas1,
> 8);
> -    System.out.print(lemmas1[8] + " :\t");
> -    WSDHelper.print(senses1);
> -    WSDHelper.print("*****************************");
> -
> -    /**
> -     * This is how to make the context for disambiguation of span of
> words
> -     */
> -    String test2 = "The component was highly radioactive to the
> point that"
> -        + " it has been activated the second it touched water";
> -    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
> -    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
>      List<String> tempLemmas2 = new ArrayList<String>();
>      for (int i = 0; i < sentence2.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence2[i], tags2[i]);
> -      tempLemmas2.add(lemma);
> +      tempLemmas2
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i],
> tags2[i]));
>      }
> -    String[] lemmas2 = tempLemmas2.toArray(new
> String[tempLemmas2.size()]);
> -    Span span = new Span(3, 7);
> +    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
>  
> -    // output
> -    List<String[]> senses2 = mfs.disambiguate(sentence2, tags2,
> lemmas2, span);
> -    for (int i = span.getStart(); i < span.getEnd() + 1; i++) {
> -      String[] senses = senses2.get(i - span.getStart());
> -      System.out.print(lemmas2[i] + " :\t");
> -      WSDHelper.print(senses);
> -      WSDHelper.print("----------");
> -    }
> -
> -    WSDHelper.print("*****************************");
> -
> -    /**
> -     * This is how to make the context for all-words-disambiguation
> -     */
> -    String test3 = "The summer is almost over and I have not been to
> the beach even once";
> -    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
> -    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
>      List<String> tempLemmas3 = new ArrayList<String>();
>      for (int i = 0; i < sentence3.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence3[i], tags3[i]);
> -      tempLemmas3.add(lemma);
> +      tempLemmas3
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i],
> tags3[i]));
>      }
> -    String[] lemmas3 = tempLemmas3.toArray(new
> String[tempLemmas3.size()]);
> +    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
>  
> -    // output
> -    List<String[]> senses3 = mfs.disambiguate(sentence3, tags3,
> lemmas3);
> -    for (int i = 0; i < sentence3.length; i++) {
> -      String[] senses = senses3.get(i);
> -      System.out.print(lemmas3[i] + " :\t");
> -      WSDHelper.print(senses);
> -      WSDHelper.print("----------");
> -    }
> +    mfs = new MFS();
>  
>    }
>  
> +  /*
> +   * Tests disambiguating only one word : The ambiguous word
> "please"
> +   */
> +  @Test
> +  public void testOneWordDisambiguation() {
> +    String[] senses = mfs.disambiguate(sentence1, tags1, lemmas1,
> 8);
> +
> +    assertEquals("Check number of senses", 1, senses.length);
> +  }
> +
> +  /*
> +   * Tests disambiguating a word Span In this case we test a mix of
> monosemous
> +   * and polysemous words as well as words that do not need
> disambiguation such
> +   * as determiners
> +   */
> +  @Test
> +  public void testWordSpanDisambiguation() {
> +    Span span = new Span(3, 7);
> +    List<String[]> senses = mfs.disambiguate(sentence2, tags2,
> lemmas2, span);
> +
> +    assertEquals("Check number of returned words", 5,
> senses.size());
> +    assertEquals("Check number of senses", 1, senses.get(0).length);
> +    assertEquals("Check monosemous word", 1, senses.get(1).length);
> +    assertEquals("Check preposition", "WSDHELPER to",
> senses.get(2)[0]);
> +    assertEquals("Check determiner", "WSDHELPER determiner",
> senses.get(3)[0]);
> +  }
> +
> +  /*
> +   * Tests disambiguating all the words
> +   */
> +  @Test
> +  public void testAllWordsDisambiguation() {
> +    List<String[]> senses = mfs.disambiguate(sentence3, tags3,
> lemmas3);
> +
> +    assertEquals("Check number of returned words", 15,
> senses.size());
> +    assertEquals("Check preposition", "WSDHELPER personal pronoun",
> +        senses.get(6)[0]);
> +  }
>  }
> \ No newline at end of file
> 
> Copied: opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java (from
> r1733577, opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/OSCCTester.java)
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/tes
> t/java/opennlp/tools/disambiguator/OSCCMETester.java?p2=opennlp/sandb
> ox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java&p1=op
> ennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/OSCCTester.java&r1=1733
> 577&r2=1734600&rev=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/OSCCTester.java
> (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/OSCCMETester.java Fri
> Mar 11 17:37:07 2016
> @@ -19,11 +19,18 @@
>  
>  package opennlp.tools.disambiguator;
>  
> +import static org.junit.Assert.assertEquals;
> +import static org.junit.Assert.assertNotNull;
> +import static org.junit.Assert.fail;
> +
>  import java.io.File;
>  import java.io.IOException;
>  import java.util.ArrayList;
>  import java.util.List;
>  
> +import org.junit.BeforeClass;
> +import org.junit.Test;
> +
>  import opennlp.tools.disambiguator.datareader.SemcorReaderExtended;
>  import opennlp.tools.disambiguator.oscc.OSCCFactory;
>  import opennlp.tools.disambiguator.oscc.OSCCME;
> @@ -33,84 +40,154 @@ import opennlp.tools.util.ObjectStream;
>  import opennlp.tools.util.Span;
>  import opennlp.tools.util.TrainingParameters;
>  
> -public class OSCCTester {
> -
> -  public static void main(String[] args) {
> -
> -    SemcorReaderExtended sr = new SemcorReaderExtended();
> -
> -    String modelsDir = "src\\test\\resources\\models\\";
> +/**
> + * This is the test class for {@link OSCCME}.
> + * 
> + * The scope of this test is to make sure that the OSCC
> disambiguator code can
> + * be executed. This test can not detect mistakes which lead to
> incorrect
> + * feature generation or other mistakes which decrease the
> disambiguation
> + * performance of the disambiguator.
> + * 
> + * In this test the {@link OSCCME} is trained with Semcor and then
> the computed
> + * model is used to predict sentences from the training sentences.
> + */
> +public class OSCCMETester {
> +  // TODO write more tests
> +  // TODO modify when we fix the parameter model
> +
> +  static String modelsDir = "src\\test\\resources\\models\\";
> +  static String trainingDataDirectory =
> "src\\test\\resources\\supervised\\models\\";
> +
> +  static OSCCParameters OSCCParams;
> +  static OSCCME oscc;
> +  static OSCCFactory osccFactory;
> +  static OSCCModel model;
> +
> +  static String test = "please.v";
> +  static File outFile;
> +
> +  static String test1 = "We need to discuss an important topic,
> please write to me soon.";
> +  static String test2 = "The component was highly radioactive to the
> point that"
> +      + " it has been activated the second it touched water";
> +  static String test3 = "The summer is almost over and I did not go
> to the beach even once";
> +
> +  static String[] sentence1;
> +  static String[] sentence2;
> +  static String[] sentence3;
> +
> +  static String[] tags1;
> +  static String[] tags2;
> +  static String[] tags3;
> +
> +  static String[] lemmas1;
> +  static String[] lemmas2;
> +  static String[] lemmas3;
> +
> +  /*
> +   * Setup the testing variables
> +   */
> +  @BeforeClass
> +  public static void setUpAndTraining() {
>      WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
>      WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
>      WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
>  
> -    String test = "write.v";
> -    TrainingParameters trainingParams = new TrainingParameters();
> -    OSCCParameters OSCCParams = new OSCCParameters("");
> -    OSCCFactory OSCCFactory = new OSCCFactory();
> +    sentence1 = WSDHelper.getTokenizer().tokenize(test1);
> +    sentence2 = WSDHelper.getTokenizer().tokenize(test2);
> +    sentence3 = WSDHelper.getTokenizer().tokenize(test3);
> +
> +    tags1 = WSDHelper.getTagger().tag(sentence1);
> +    tags2 = WSDHelper.getTagger().tag(sentence2);
> +    tags3 = WSDHelper.getTagger().tag(sentence3);
> +
> +    List<String> tempLemmas1 = new ArrayList<String>();
> +    for (int i = 0; i < sentence1.length; i++) {
> +      tempLemmas1
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence1[i],
> tags1[i]));
> +    }
> +    lemmas1 = tempLemmas1.toArray(new String[tempLemmas1.size()]);
>  
> +    List<String> tempLemmas2 = new ArrayList<String>();
> +    for (int i = 0; i < sentence2.length; i++) {
> +      tempLemmas2
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence2[i],
> tags2[i]));
> +    }
> +    lemmas2 = tempLemmas2.toArray(new String[tempLemmas2.size()]);
> +
> +    List<String> tempLemmas3 = new ArrayList<String>();
> +    for (int i = 0; i < sentence3.length; i++) {
> +      tempLemmas3
> +          .add(WSDHelper.getLemmatizer().lemmatize(sentence3[i],
> tags3[i]));
> +    }
> +    lemmas3 = tempLemmas3.toArray(new String[tempLemmas3.size()]);
> +
> +    OSCCParams = new OSCCParameters("");
> +    OSCCParams.setTrainingDataDirectory(trainingDataDirectory);
> +    osccFactory = new OSCCFactory();
> +    TrainingParameters trainingParams = new TrainingParameters();
> +    SemcorReaderExtended sr = new SemcorReaderExtended();
>      ObjectStream<WSDSample> sampleStream =
> sr.getSemcorDataStream(test);
>  
> -    OSCCModel model = null;
> -    OSCCModel readModel = null;
> -    try {
> -      model = OSCCME.train("en", sampleStream, trainingParams,
> OSCCParams,
> -          OSCCFactory);
> -      model.writeModel(test);
> -      File outFile = new File(test + ".OSCC.model");
> -      readModel = new OSCCModel(outFile);
> +    OSCCModel writeModel = null;
> +    /*
> +     * Tests training the disambiguator We test both writing and
> reading a model
> +     * file trained by semcor
> +     */
>  
> +    try {
> +      writeModel = OSCCME.train("en", sampleStream, trainingParams,
> OSCCParams,
> +          osccFactory);
> +      assertNotNull("Checking the model to be written", writeModel);
> +      writeModel.writeModel(OSCCParams.getTrainingDataDirectory() +
> test);
> +      outFile = new File(
> +          OSCCParams.getTrainingDataDirectory() + test +
> ".oscc.model");
> +      model = new OSCCModel(outFile);
> +      assertNotNull("Checking the read model", model);
> +      oscc = new OSCCME(model, OSCCParams);
> +      assertNotNull("Checking the disambiguator", oscc);
>      } catch (IOException e1) {
> -      // TODO Auto-generated catch block
>        e1.printStackTrace();
> +      fail("Exception in training");
>      }
> -    OSCCME OSCC = new OSCCME(readModel, OSCCParams);
> +  }
>  
> -    /**
> -     * This is how to make the context for one-word-disambiguation
> using OSCC
> -     */
> -    String test1 = "We need to discuss important topic, please write
> to me soon.";
> -    String[] sentence1 = WSDHelper.getTokenizer().tokenize(test1);
> -    String[] tags1 = WSDHelper.getTagger().tag(sentence1);
> -    List<String> tempLemmas1 = new ArrayList<String>();
> -    for (int i = 0; i < sentence1.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence1[i], tags1[i]);
> -      tempLemmas1.add(lemma);
> -    }
> -    String[] lemmas1 = tempLemmas1.toArray(new
> String[tempLemmas1.size()]);
> +  /*
> +   * Tests disambiguating only one word : The ambiguous word
> "please"
> +   */
> +  @Test
> +  public void testOneWordDisambiguation() {
> +    String[] senses = oscc.disambiguate(sentence1, tags1, lemmas1,
> 8);
>  
> -    // output
> -    String[] senses1 = OSCC.disambiguate(sentence1, tags1, lemmas1,
> 8);
> -    System.out.print(lemmas1[8] + " :\t");
> -    WSDHelper.print(senses1);
> -    WSDHelper.print("*****************************");
> +    assertEquals("Check number of senses", 1, senses.length);
> +  }
>  
> -    /**
> -     * This is how to make the context for disambiguation of span of
> words
> -     */
> -    String test2 = "The component was highly radioactive to the
> point that"
> -        + " it has been activated the second it touched water";
> -    String[] sentence2 = WSDHelper.getTokenizer().tokenize(test2);
> -    String[] tags2 = WSDHelper.getTagger().tag(sentence2);
> -    List<String> tempLemmas2 = new ArrayList<String>();
> -    for (int i = 0; i < sentence2.length; i++) {
> -      String lemma = WSDHelper.getLemmatizer()
> -          .lemmatize(sentence2[i], tags2[i]);
> -      tempLemmas2.add(lemma);
> -    }
> -    String[] lemmas2 = tempLemmas2.toArray(new
> String[tempLemmas2.size()]);
> +  /*
> +   * Tests disambiguating a word Span In this case we test a mix of
> monosemous
> +   * and polysemous words as well as words that do not need
> disambiguation such
> +   * as determiners
> +   */
> +  @Test
> +  public void testWordSpanDisambiguation() {
>      Span span = new Span(3, 7);
> +    List<String[]> senses = oscc.disambiguate(sentence2, tags2,
> lemmas2, span);
>  
> -    // output
> -    List<String[]> senses2 = OSCC.disambiguate(sentence2, tags2,
> lemmas2, span);
> -    for (int i = span.getStart(); i < span.getEnd() + 1; i++) {
> -      String[] senses = senses2.get(i - span.getStart());
> -      System.out.print(lemmas2[i] + " :\t");
> -      WSDHelper.print(senses);
> -      WSDHelper.print("----------");
> -    }
> +    assertEquals("Check number of returned words", 5,
> senses.size());
> +    assertEquals("Check number of senses", 1, senses.get(0).length);
> +    assertEquals("Check monosemous word", 1, senses.get(1).length);
> +    assertEquals("Check preposition", "WSDHELPER to",
> senses.get(2)[0]);
> +    assertEquals("Check determiner", "WSDHELPER determiner",
> senses.get(3)[0]);
> +  }
>  
> -    WSDHelper.print("*****************************");
> +  /*
> +   * Tests disambiguating all the words
> +   */
> +  @Test
> +  public void testAllWordsDisambiguation() {
> +    List<String[]> senses = oscc.disambiguate(sentence3, tags3,
> lemmas3);
> +
> +    assertEquals("Check number of returned words", 15,
> senses.size());
> +    assertEquals("Check preposition", "WSDHELPER personal pronoun",
> +        senses.get(6)[0]);
>    }
> +
>  }
> \ No newline at end of file
> 
> Modified: opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/tes
> t/java/opennlp/tools/disambiguator/Tester.java?rev=1734600&r1=1734599
> &r2=1734600&view=diff
> =====================================================================
> =========
> --- opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/Tester.java (original)
> +++ opennlp/sandbox/opennlp-
> wsd/src/test/java/opennlp/tools/disambiguator/Tester.java Fri Mar 11
> 17:37:07 2016
> @@ -1,36 +1,40 @@
>  package opennlp.tools.disambiguator;
>  
> +import java.util.ArrayList;
> +import java.util.List;
>  
> +import opennlp.tools.disambiguator.ims.IMSME;
> +import opennlp.tools.disambiguator.ims.IMSParameters;
>  
>  public class Tester {
>  
>    public static void main(String[] args) {
> -//
> -//    String modelsDir = "src\\test\\resources\\models\\";
> -//    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
> -//    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
> -//    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
> -//
> -//    IMSME ims = new IMSME();
> -//
> -//    String test3 = "The summer is almost over and I haven't been
> to the beach even once";
> -//    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
> -//    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
> -//    List<String> tempLemmas3 = new ArrayList<String>();
> -//    for (int i = 0; i < sentence3.length; i++) {
> -//      String lemma = WSDHelper.getLemmatizer()
> -//          .lemmatize(sentence3[i], tags3[i]);
> -//      tempLemmas3.add(lemma);
> -//    }
> -//    String[] lemmas3 = tempLemmas3.toArray(new
> String[tempLemmas3.size()]);
> -//
> -//    // output
> -//    List<String[]> senses3 = ims.disambiguate(sentence3, tags3,
> lemmas3);
> -//    for (int i = 0; i < sentence3.length; i++) {
> -//      System.out.print(sentence3[i] + " : ");
> -//      WSDHelper.printResults(ims, senses3.get(i));
> -//      WSDHelper.print("----------");
> -//    }
> +
> +    String modelsDir = "src\\test\\resources\\models\\";
> +    WSDHelper.loadTokenizer(modelsDir + "en-token.bin");
> +    WSDHelper.loadLemmatizer(modelsDir + "en-lemmatizer.dict");
> +    WSDHelper.loadTagger(modelsDir + "en-pos-maxent.bin");
> +
> +    IMSME ims = new IMSME(new IMSParameters("\\"));
> +
> +    String test3 = "The summer is almost over and I haven't been to
> the beach even once";
> +    String[] sentence3 = WSDHelper.getTokenizer().tokenize(test3);
> +    String[] tags3 = WSDHelper.getTagger().tag(sentence3);
> +    List<String> tempLemmas3 = new ArrayList<String>();
> +    for (int i = 0; i < sentence3.length; i++) {
> +      String lemma =
> WSDHelper.getLemmatizer().lemmatize(sentence3[i],
> +          tags3[i]);
> +      tempLemmas3.add(lemma);
> +    }
> +    String[] lemmas3 = tempLemmas3.toArray(new
> String[tempLemmas3.size()]);
> +
> +    // output
> +    List<String[]> senses3 = ims.disambiguate(sentence3, tags3,
> lemmas3);
> +    for (int i = 0; i < sentence3.length; i++) {
> +      System.out.print(sentence3[i] + " : ");
> +      WSDHelper.printResults(ims, senses3.get(i));
> +      WSDHelper.print("----------");
> +    }
>  
>    }
>  }
> \ No newline at end of file
> 
>

Re: svn commit: r1734600 - in /opennlp/sandbox/opennlp-wsd/src: main/java/opennlp/tools/disambiguator/ main/java/opennlp/tools/disambiguator/ims/ main/java/opennlp/tools/disambiguator/mfs/ main/java/opennlp/tools/disambiguator/oscc/ test/java/opennlp/tools...

Reply via email to