OPENNLP-1003: Write a test case for the BioCodec class

This closes #141


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/9956ff5a
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/9956ff5a
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/9956ff5a

Branch: refs/heads/parser_regression
Commit: 9956ff5a590400871dbdfddcf6973a691b96abb1
Parents: 91c4baa
Author: Peter Thygesen <[email protected]>
Authored: Tue Mar 14 23:22:24 2017 +0100
Committer: Jörn Kottmann <[email protected]>
Committed: Thu Apr 20 12:40:23 2017 +0200

----------------------------------------------------------------------
 .../opennlp/tools/namefind/BioCodecTest.java    | 263 +++++++++++++++++++
 1 file changed, 263 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/9956ff5a/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java
new file mode 100644
index 0000000..c894742
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java
@@ -0,0 +1,263 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.Span;
+
+/**
+ * This is the test class for {@link BioCodec}.
+ */
+public class BioCodecTest {
+
+  private static final BioCodec codec = new BioCodec();
+
+  private static final String A_TYPE = "atype";
+  private static final String A_START = A_TYPE + "-" + BioCodec.START;
+  private static final String A_CONTINUE = A_TYPE + "-" + BioCodec.CONTINUE;
+
+  private static final String B_TYPE = "btype";
+  private static final String B_START = B_TYPE + "-" + BioCodec.START;
+  private static final String B_CONTINUE = B_TYPE + "-" + BioCodec.CONTINUE;
+
+  private static final String C_TYPE = "ctype";
+  private static final String C_START = C_TYPE + "-" + BioCodec.START;
+
+  private static final String OTHER = BioCodec.OTHER;
+
+  @Test
+  public void testEncodeNoNames() {
+    NameSample nameSample = new NameSample("Once upon a time.".split(" "), new 
Span[] {}, true);
+    String[] expected = new String[] { OTHER, OTHER, OTHER, OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+    Assert.assertArrayEquals("Only 'Other' is expected.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeSingleTokenSpan() {
+    String[] sentence = "I called Julie again.".split(" ");
+    Span[] spans = new Span[] { new Span(2,3, A_TYPE)};
+    NameSample nameSample = new NameSample(sentence, spans, true);
+    String[] expected = new String[] {OTHER, OTHER, A_START, OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+    Assert.assertArrayEquals("'Julie' should be 'start' only, the rest should 
be 'other'.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeDoubleTokenSpan() {
+    String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
+    Span[] span = new Span[] { new Span(2,4, A_TYPE)};
+    NameSample nameSample = new NameSample(sentence, span, true);
+    String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, 
OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+    Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is 
" +
+        "'continue' and the rest should be 'other'.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeDoubleTokenSpanNoType() {
+    final String DEFAULT_START = "default" + "-" + BioCodec.START;
+    final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE;
+    String[] sentence = "I saw Stefanie Schmidt today.".split(" ");
+    Span[] span = new Span[] { new Span(2,4, null)};
+    NameSample nameSample = new NameSample(sentence, span, true);
+    String[] expected = new String[] {OTHER, OTHER, DEFAULT_START, 
DEFAULT_CONTINUE, OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+    Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is 
" +
+        "'continue' and the rest should be 'other'.", expected, actual);
+  }
+
+  @Test
+  public void testEncodeAdjacentSingleSpans() {
+    String[] sentence = "something PersonA PersonB Something".split(" ");
+    Span[] span = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE) };
+    NameSample nameSample = new NameSample(sentence, span, true);
+    String[] expected = new String[] {OTHER, A_START, A_START, OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  @Test
+  public void testEncodeAdjacentSpans() {
+    String[] sentence = "something PersonA PersonA PersonB Something".split(" 
");
+    Span[] span = new Span[] { new Span(1,3, A_TYPE), new Span(3, 4, A_TYPE) };
+    NameSample nameSample = new NameSample(sentence, span, true);
+    String[] expected = new String[] {OTHER, A_START, A_CONTINUE, A_START, 
OTHER};
+    String[] actual = codec.encode(nameSample.getNames(), 
nameSample.getSentence().length);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  @Test
+  public void testCreateSequenceValidator() {
+    Assert.assertTrue(codec.createSequenceValidator() instanceof 
NameFinderSequenceValidator);
+  }
+
+
+  @Test
+  public void testDecodeEmpty() {
+    Span[] expected = new Span[] {};
+    Span[] actual = codec.decode(new ArrayList<String>());
+    Assert.assertArrayEquals(expected, actual);
+  }
+  /**
+   * Start, Other
+   */
+  @Test
+  public void testDecodeSingletonFirst() {
+
+    List<String> encoded = Arrays.asList(B_START, OTHER);
+    Span[] expected = new Span[] {new Span(0, 1, B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start Start Other
+   */
+  @Test
+  public void testDecodeAdjacentSingletonFirst() {
+    List<String> encoded = Arrays.asList(B_START, B_START, OTHER);
+    Span[] expected = new Span[] {new Span(0, 1, B_TYPE), new Span(1, 2, 
B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start Continue Other
+   */
+  @Test
+  public void testDecodePairFirst() {
+    List<String> encoded = Arrays.asList(B_START, B_CONTINUE, OTHER);
+    Span[] expected = new Span[] {new Span(0, 2, B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start Continue Continue Other
+   */
+  @Test
+  public void testDecodeTripletFirst() {
+    List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_CONTINUE, 
OTHER);
+    Span[] expected = new Span[] {new Span(0, 3, B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Start Continue Start Other
+   */
+  @Test
+  public void testDecodeAdjacentPairSingleton() {
+    List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_START, OTHER);
+    Span[] expected = new Span[] {new Span(0, 2, B_TYPE), new Span(2, 3, 
B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * Other Start Other
+   */
+  @Test
+  public void testDecodeOtherFirst() {
+    List<String> encoded = Arrays.asList(OTHER, B_START, OTHER);
+    Span[] expected = new Span[] {new Span(1, 2, B_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  /**
+   * A-Start A-Continue, A-Continue, Other, B-Start, B-Continue, Other, 
C-Start, Other
+   */
+  @Test
+  public void testDecodeMultiClass() {
+    List<String> encoded = Arrays.asList(OTHER, A_START, A_CONTINUE, 
A_CONTINUE,
+        OTHER, B_START, B_CONTINUE, OTHER, C_START, OTHER);
+    Span[] expected = new Span[] {new Span(1, 4, A_TYPE),
+        new Span(5, 7, B_TYPE), new Span(8, 9, C_TYPE)};
+    Span[] actual = codec.decode(encoded);
+    Assert.assertArrayEquals(expected, actual);
+  }
+
+  @Test
+  public void testCompatibilityEmpty() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {}));
+  }
+
+  @Test
+  public void testCompatibilitySingleStart() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START}));
+  }
+
+  @Test
+  public void testCompatibilitySingleContinue() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, 
A_CONTINUE}));
+  }
+
+  @Test
+  public void testCompatibilitySingleOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {OTHER}));
+  }
+
+  @Test
+  public void testCompatibilityStartContinue() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, 
A_CONTINUE}));
+  }
+
+  @Test
+  public void testCompatibilityStartOther() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, 
OTHER}));
+  }
+
+  @Test
+  public void testCompatibilityContinueOther() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE, 
OTHER}));
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, 
A_CONTINUE, OTHER}));
+  }
+
+  @Test
+  public void testCompatibilityStartContinueOther() {
+    Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, 
A_CONTINUE, OTHER}));
+  }
+
+
+  @Test
+  public void testCompatibilityMultiClass() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_CONTINUE, B_START, OTHER}));
+  }
+
+  @Test
+  public void testCompatibilityBadTag() {
+    Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, 
A_CONTINUE, "BAD"}));
+  }
+
+  @Test
+  public void testCompatibilityRepeated() {
+    Assert.assertTrue(codec.areOutcomesCompatible(
+        new String[] {A_START, A_START, A_CONTINUE, A_CONTINUE, B_START, 
B_START, OTHER, OTHER}));
+  }
+
+}

Reply via email to