Repository: opennlp
Updated Branches:
  refs/heads/master 6d2fa0481 -> 5f96aa323


OPENNLP-1001: Changes signature of Chunker Sequence Validator

Chunker SequenceValidator should have access to both token and POS tag

Closes #137


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5f96aa32
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5f96aa32
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5f96aa32

Branch: refs/heads/master
Commit: 5f96aa323dce95467365b41077b609852ddb08aa
Parents: 6d2fa04
Author: William D C M SILVA <[email protected]>
Authored: Thu Feb 23 15:04:37 2017 -0300
Committer: Jörn Kottmann <[email protected]>
Committed: Sun May 7 21:05:48 2017 +0200

----------------------------------------------------------------------
 .../tools/chunker/ChunkerContextGenerator.java  |   3 +-
 .../opennlp/tools/chunker/ChunkerFactory.java   |   3 +-
 .../java/opennlp/tools/chunker/ChunkerME.java   |  19 ++--
 .../opennlp/tools/chunker/ChunkerModel.java     |  14 ++-
 .../chunker/DefaultChunkerContextGenerator.java |  14 ++-
 .../DefaultChunkerSequenceValidator.java        |   5 +-
 .../tools/parser/ChunkContextGenerator.java     |  12 +++
 .../tools/parser/ParserChunkerFactory.java      |   3 +-
 .../parser/ParserChunkerSequenceValidator.java  |   9 +-
 .../main/java/opennlp/tools/util/TokenTag.java  |  99 +++++++++++++++++++
 .../opennlp/tools/chunker/ChunkerMETest.java    |   2 +-
 .../opennlp/tools/chunker/ChunkerModelTest.java |  58 +++++++++++
 .../tools/chunker/DummyChunkerFactory.java      |   5 +-
 .../opennlp/tools/chunker/chunker170custom.bin  | Bin 0 -> 21675 bytes
 .../opennlp/tools/chunker/chunker170default.bin | Bin 0 -> 21671 bytes
 .../opennlp/tools/chunker/chunker180custom.bin  | Bin 0 -> 21675 bytes
 16 files changed, 227 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
index 590bc85..b666ad3 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
@@ -18,11 +18,12 @@
 package opennlp.tools.chunker;
 
 import opennlp.tools.util.BeamSearchContextGenerator;
+import opennlp.tools.util.TokenTag;
 
 /**
  * Interface for the context generator used in syntactic chunking.
  */
-public interface ChunkerContextGenerator extends 
BeamSearchContextGenerator<String> {
+public interface ChunkerContextGenerator extends 
BeamSearchContextGenerator<TokenTag> {
 
   /**
    * Returns the contexts for chunking of the specified index.

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
index 1cb772f..961a738 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
@@ -20,6 +20,7 @@ package opennlp.tools.chunker;
 import opennlp.tools.util.BaseToolFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.SequenceValidator;
+import opennlp.tools.util.TokenTag;
 import opennlp.tools.util.ext.ExtensionLoader;
 
 public class ChunkerFactory extends BaseToolFactory {
@@ -53,7 +54,7 @@ public class ChunkerFactory extends BaseToolFactory {
     // no additional artifacts
   }
 
-  public SequenceValidator<String> getSequenceValidator() {
+  public SequenceValidator<TokenTag> getSequenceValidator() {
     return new DefaultChunkerSequenceValidator();
   }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
index 8be0cca..4346df3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
@@ -34,6 +34,7 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.Sequence;
 import opennlp.tools.util.SequenceValidator;
 import opennlp.tools.util.Span;
+import opennlp.tools.util.TokenTag;
 import opennlp.tools.util.TrainingParameters;
 
 /**
@@ -49,10 +50,10 @@ public class ChunkerME implements Chunker {
   /**
    * The model used to assign chunk tags to a sequence of tokens.
    */
-  protected SequenceClassificationModel<String> model;
+  protected SequenceClassificationModel<TokenTag> model;
 
   private ChunkerContextGenerator contextGenerator;
-  private SequenceValidator<String> sequenceValidator;
+  private SequenceValidator<TokenTag> sequenceValidator;
 
   /**
    * Initializes the current instance with the specified model and
@@ -67,7 +68,7 @@ public class ChunkerME implements Chunker {
    *     to configure the {@link SequenceValidator} and {@link 
ChunkerContextGenerator}.
    */
   @Deprecated
-  private ChunkerME(ChunkerModel model, int beamSize, 
SequenceValidator<String> sequenceValidator,
+  private ChunkerME(ChunkerModel model, int beamSize, 
SequenceValidator<TokenTag> sequenceValidator,
       ChunkerContextGenerator contextGenerator) {
 
     this.sequenceValidator = sequenceValidator;
@@ -117,7 +118,8 @@ public class ChunkerME implements Chunker {
   }
 
   public String[] chunk(String[] toks, String[] tags) {
-    bestSequence = model.bestSequence(toks, new Object[] {tags}, 
contextGenerator, sequenceValidator);
+    TokenTag[] tuples = TokenTag.create(toks, tags);
+    bestSequence = model.bestSequence(tuples, new Object[] {}, 
contextGenerator, sequenceValidator);
     List<String> c = bestSequence.getOutcomes();
     return c.toArray(new String[c.size()]);
   }
@@ -128,12 +130,15 @@ public class ChunkerME implements Chunker {
   }
 
   public Sequence[] topKSequences(String[] sentence, String[] tags) {
-    return model.bestSequences(DEFAULT_BEAM_SIZE, sentence,
-        new Object[] { tags }, contextGenerator, sequenceValidator);
+    TokenTag[] tuples = TokenTag.create(sentence, tags);
+
+    return model.bestSequences(DEFAULT_BEAM_SIZE, tuples,
+        new Object[] { }, contextGenerator, sequenceValidator);
   }
 
   public Sequence[] topKSequences(String[] sentence, String[] tags, double 
minSequenceScore) {
-    return model.bestSequences(DEFAULT_BEAM_SIZE, sentence, new Object[] { 
tags }, minSequenceScore,
+    TokenTag[] tuples = TokenTag.create(sentence, tags);
+    return model.bestSequences(DEFAULT_BEAM_SIZE, tuples, new Object[] { }, 
minSequenceScore,
         contextGenerator, sequenceValidator);
   }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
index ed13f65..12c8bbe 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
@@ -31,6 +31,7 @@ import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.SequenceClassificationModel;
 import opennlp.tools.util.BaseToolFactory;
 import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.TokenTag;
 import opennlp.tools.util.model.BaseModel;
 
 /**
@@ -90,6 +91,17 @@ public class ChunkerModel extends BaseModel {
     if (!(artifactMap.get(CHUNKER_MODEL_ENTRY_NAME) instanceof AbstractModel)) 
{
       throw new InvalidFormatException("Chunker model is incomplete!");
     }
+
+    // Since 1.8.0 we changed the ChunkerFactory signature. This will check 
the if the model
+    // declares a not default factory, and if yes, check if it was created 
before 1.8
+    if ( (getManifestProperty(FACTORY_NAME) != null
+            && 
!getManifestProperty(FACTORY_NAME).equals("opennlp.tools.chunker.ChunkerFactory")
 )
+        && this.getVersion().getMajor() <= 1
+        && this.getVersion().getMinor() < 8) {
+      throw new InvalidFormatException("The Chunker factory '" + 
getManifestProperty(FACTORY_NAME) +
+      "' is no longer compatible. Please update it to match the latest 
ChunkerFactory.");
+    }
+
   }
 
   /**
@@ -105,7 +117,7 @@ public class ChunkerModel extends BaseModel {
     }
   }
 
-  public SequenceClassificationModel<String> getChunkerSequenceModel() {
+  public SequenceClassificationModel<TokenTag> getChunkerSequenceModel() {
 
     Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
index 76616d4..b140c3f 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
@@ -18,6 +18,8 @@
 
 package opennlp.tools.chunker;
 
+import opennlp.tools.util.TokenTag;
+
 /** Features based on chunking model described in Fei Sha and Fernando 
Pereira. Shallow
  *  parsing with conditional random fields. In Proceedings of HLT-NAACL 2003. 
Association
  *  for Computational Linguistics, 2003.
@@ -30,9 +32,9 @@ public class DefaultChunkerContextGenerator implements 
ChunkerContextGenerator {
   public DefaultChunkerContextGenerator() {
   }
 
-  public String[] getContext(int index, String[] sequence,
+  public String[] getContext(int index, String[] tokens, String[] postags,
       String[] priorDecisions, Object[] additionalContext) {
-    return getContext(index,sequence,(String[]) 
additionalContext[0],priorDecisions);
+    return getContext(index, tokens, postags, priorDecisions);
   }
 
   public String[] getContext(int i, String[] toks, String[] tags, String[] 
preds) {
@@ -143,4 +145,12 @@ public class DefaultChunkerContextGenerator implements 
ChunkerContextGenerator {
 
     return features;
   }
+
+  @Override
+  public String[] getContext(int index, TokenTag[] sequence, String[] 
priorDecisions,
+                             Object[] additionalContext) {
+    String[] token = TokenTag.extractTokens(sequence);
+    String[] tags = TokenTag.extractTags(sequence);
+    return getContext(index, token, tags, priorDecisions, additionalContext);
+  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
index ce395eb..e1e09fa 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
@@ -18,8 +18,9 @@
 package opennlp.tools.chunker;
 
 import opennlp.tools.util.SequenceValidator;
+import opennlp.tools.util.TokenTag;
 
-public class DefaultChunkerSequenceValidator implements 
SequenceValidator<String> {
+public class DefaultChunkerSequenceValidator implements 
SequenceValidator<TokenTag> {
 
   private boolean validOutcome(String outcome, String prevOutcome) {
     if (outcome.startsWith("I-")) {
@@ -46,7 +47,7 @@ public class DefaultChunkerSequenceValidator implements 
SequenceValidator<String
     return validOutcome(outcome,prevOutcome);
   }
 
-  public boolean validSequence(int i, String[] sequence, String[] s, String 
outcome) {
+  public boolean validSequence(int i, TokenTag[] sequence, String[] s, String 
outcome) {
     return validOutcome(outcome, s);
   }
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java 
b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
index 7d37fcb..e32cead 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/parser/ChunkContextGenerator.java
@@ -22,6 +22,7 @@ import java.util.List;
 
 import opennlp.tools.chunker.ChunkerContextGenerator;
 import opennlp.tools.util.Cache;
+import opennlp.tools.util.TokenTag;
 
 /**
  * Creates predivtive context for the pre-chunking phases of parsing.
@@ -44,11 +45,13 @@ public class ChunkContextGenerator implements 
ChunkerContextGenerator {
     }
   }
 
+  @Deprecated
   public String[] getContext(Object o) {
     Object[] data = (Object[]) o;
     return getContext((Integer) data[0], (String[]) data[1], (String[]) 
data[2], (String[]) data[3]);
   }
 
+  @Deprecated
   public String[] getContext(int i, String[] words, String[] prevDecisions, 
Object[] ac) {
     return getContext(i,words,(String[]) ac[0],prevDecisions);
   }
@@ -184,4 +187,13 @@ public class ChunkContextGenerator implements 
ChunkerContextGenerator {
     }
     return feat.toString();
   }
+
+  @Override
+  public String[] getContext(int index, TokenTag[] sequence, String[] 
priorDecisions,
+                             Object[] additionalContext) {
+    String[] token = TokenTag.extractTokens(sequence);
+    String[] tags = TokenTag.extractTags(sequence);
+
+    return getContext(index, token, tags, priorDecisions);
+  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java 
b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
index 7d3c8f7..ca792c9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerFactory.java
@@ -22,6 +22,7 @@ import opennlp.tools.chunker.ChunkerFactory;
 import opennlp.tools.chunker.ChunkerME;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.util.SequenceValidator;
+import opennlp.tools.util.TokenTag;
 
 public class ParserChunkerFactory extends ChunkerFactory {
 
@@ -31,7 +32,7 @@ public class ParserChunkerFactory extends ChunkerFactory {
   }
 
   @Override
-  public SequenceValidator<String> getSequenceValidator() {
+  public SequenceValidator<TokenTag> getSequenceValidator() {
 
     MaxentModel model = artifactProvider.getArtifact("chunker.model");
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
index ef15bf5..3536841 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/parser/ParserChunkerSequenceValidator.java
@@ -22,8 +22,9 @@ import java.util.Map;
 
 import opennlp.tools.parser.chunking.Parser;
 import opennlp.tools.util.SequenceValidator;
+import opennlp.tools.util.TokenTag;
 
-public class ParserChunkerSequenceValidator implements 
SequenceValidator<String> {
+public class ParserChunkerSequenceValidator implements 
SequenceValidator<TokenTag> {
 
   private Map<String, String> continueStartMap;
 
@@ -66,4 +67,10 @@ public class ParserChunkerSequenceValidator implements 
SequenceValidator<String>
     }
     return true;
   }
+
+  @Override
+  public boolean validSequence(int i, TokenTag[] inputTuples, String[] 
outcomesSequence, String outcome) {
+    String[] inputSequence = TokenTag.extractTokens(inputTuples);
+    return validSequence(i, inputSequence, outcomesSequence, outcome);
+  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/main/java/opennlp/tools/util/TokenTag.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/TokenTag.java 
b/opennlp-tools/src/main/java/opennlp/tools/util/TokenTag.java
new file mode 100644
index 0000000..2a4377b
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/TokenTag.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package opennlp.tools.util;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+public class TokenTag {
+
+  private final String token;
+  private final String tag;
+  private final String[] addtionalData;
+
+  public TokenTag(String token, String tag, String[] addtionalData) {
+    this.token = token;
+    this.tag = tag;
+    if (addtionalData != null) {
+      this.addtionalData = Arrays.copyOf(addtionalData, addtionalData.length);
+    } else {
+      this.addtionalData = null;
+    }
+  }
+
+  public String getToken() {
+    return token;
+  }
+
+  public String getTag() {
+    return tag;
+  }
+
+  public String[] getAddtionalData() {
+    return addtionalData;
+  }
+
+  public static String[] extractTokens(TokenTag[] tuples) {
+    String[] tokens = new String[tuples.length];
+    for (int i = 0; i < tuples.length; i++) {
+      tokens[i] = tuples[i].getToken();
+    }
+
+    return tokens;
+  }
+
+  public static String[] extractTags(TokenTag[] tuples) {
+    String[] tags = new String[tuples.length];
+    for (int i = 0; i < tuples.length; i++) {
+      tags[i] = tuples[i].getTag();
+    }
+
+    return tags;
+  }
+
+  public static TokenTag[] create(String[] toks, String[] tags) {
+    TokenTag[] tuples = new TokenTag[toks.length];
+    for (int i = 0; i < toks.length; i++) {
+      tuples[i] = new TokenTag(toks[i], tags[i], null);
+    }
+    return tuples;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    } else if (o instanceof TokenTag) {
+      return Objects.equals(this.token, ((TokenTag) o).token)
+          && Objects.equals(this.tag, ((TokenTag) o).tag)
+          && Objects.equals(this.addtionalData, ((TokenTag) o).addtionalData);
+    }
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(token, tag, addtionalData);
+  }
+
+  @Override
+  public String toString() {
+    return token + "_" + tag;
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java 
b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
index 4922ce9..3c04894 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
@@ -124,7 +124,7 @@ public class ChunkerMETest {
   public void testTokenProbMinScore() throws Exception {
     Sequence[] preds = chunker.topKSequences(toks1, tags1, -5.55);
 
-    Assert.assertTrue(preds.length == 4);
+    Assert.assertEquals(4, preds.length);
     Assert.assertEquals(expect1.length, preds[0].getProbs().length);
     Assert.assertEquals(Arrays.asList(expect1), preds[0].getOutcomes());
     Assert.assertNotSame(Arrays.asList(expect1), preds[1].getOutcomes());

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerModelTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerModelTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerModelTest.java
new file mode 100644
index 0000000..85afc53
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerModelTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.chunker;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * This is the test class for {@link ChunkerModel}.
+ */
+public class ChunkerModelTest {
+
+  @Test
+  public void testInvalidFactorySignature() throws Exception {
+
+    ChunkerModel model = null;
+    try {
+      model = new 
ChunkerModel(this.getClass().getResourceAsStream("chunker170custom.bin"));
+    } catch (IllegalArgumentException e) {
+      Assert.assertTrue("Exception must state ChunkerFactory",
+          e.getMessage().contains("ChunkerFactory"));
+      Assert.assertTrue("Exception must mention DummyChunkerFactory",
+          
e.getMessage().contains("opennlp.tools.chunker.DummyChunkerFactory"));
+    }
+    Assert.assertNull(model);
+  }
+
+  @Test
+  public void test170DefaultFactory() throws Exception {
+
+    Assert.assertNotNull(
+        new 
ChunkerModel(this.getClass().getResourceAsStream("chunker170default.bin")));
+
+  }
+
+  @Test
+  public void test180CustomFactory() throws Exception {
+
+    Assert.assertNotNull(
+        new 
ChunkerModel(this.getClass().getResourceAsStream("chunker180custom.bin")));
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java 
b/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java
index 0ae8b6d..9b4eae7 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java
@@ -18,6 +18,7 @@
 package opennlp.tools.chunker;
 
 import opennlp.tools.util.SequenceValidator;
+import opennlp.tools.util.TokenTag;
 
 public class DummyChunkerFactory extends ChunkerFactory {
 
@@ -30,7 +31,7 @@ public class DummyChunkerFactory extends ChunkerFactory {
   }
 
   @Override
-  public SequenceValidator<String> getSequenceValidator() {
+  public SequenceValidator<TokenTag> getSequenceValidator() {
     return new DummySequenceValidator();
   }
 
@@ -46,7 +47,7 @@ public class DummyChunkerFactory extends ChunkerFactory {
   static class DummySequenceValidator extends DefaultChunkerSequenceValidator {
 
     @Override
-    public boolean validSequence(int i, String[] sequence, String[] s,
+    public boolean validSequence(int i, TokenTag[] sequence, String[] s,
         String outcome) {
       return super.validSequence(i, sequence, s, outcome);
     }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170custom.bin
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170custom.bin 
b/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170custom.bin
new file mode 100644
index 0000000..f971261
Binary files /dev/null and 
b/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170custom.bin 
differ

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170default.bin
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170default.bin 
b/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170default.bin
new file mode 100644
index 0000000..47b1daf
Binary files /dev/null and 
b/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker170default.bin 
differ

http://git-wip-us.apache.org/repos/asf/opennlp/blob/5f96aa32/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker180custom.bin
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker180custom.bin 
b/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker180custom.bin
new file mode 100644
index 0000000..9e96e95
Binary files /dev/null and 
b/opennlp-tools/src/test/resources/opennlp/tools/chunker/chunker180custom.bin 
differ

Reply via email to