OPENNLP-1003: Write a test case for the BioCodec class This closes #141
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/9956ff5a Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/9956ff5a Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/9956ff5a Branch: refs/heads/parser_regression Commit: 9956ff5a590400871dbdfddcf6973a691b96abb1 Parents: 91c4baa Author: Peter Thygesen <[email protected]> Authored: Tue Mar 14 23:22:24 2017 +0100 Committer: Jörn Kottmann <[email protected]> Committed: Thu Apr 20 12:40:23 2017 +0200 ---------------------------------------------------------------------- .../opennlp/tools/namefind/BioCodecTest.java | 263 +++++++++++++++++++ 1 file changed, 263 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/9956ff5a/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java new file mode 100644 index 0000000..c894742 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/BioCodecTest.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.namefind; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +import opennlp.tools.util.Span; + +/** + * This is the test class for {@link BioCodec}. + */ +public class BioCodecTest { + + private static final BioCodec codec = new BioCodec(); + + private static final String A_TYPE = "atype"; + private static final String A_START = A_TYPE + "-" + BioCodec.START; + private static final String A_CONTINUE = A_TYPE + "-" + BioCodec.CONTINUE; + + private static final String B_TYPE = "btype"; + private static final String B_START = B_TYPE + "-" + BioCodec.START; + private static final String B_CONTINUE = B_TYPE + "-" + BioCodec.CONTINUE; + + private static final String C_TYPE = "ctype"; + private static final String C_START = C_TYPE + "-" + BioCodec.START; + + private static final String OTHER = BioCodec.OTHER; + + @Test + public void testEncodeNoNames() { + NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true); + String[] expected = new String[] { OTHER, OTHER, OTHER, OTHER}; + String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); + Assert.assertArrayEquals("Only 'Other' is expected.", expected, actual); + } + + @Test + public void testEncodeSingleTokenSpan() { + String[] sentence = "I called Julie again.".split(" "); + Span[] spans = new Span[] { new Span(2,3, A_TYPE)}; + NameSample nameSample = new NameSample(sentence, spans, true); + String[] expected = new String[] {OTHER, OTHER, A_START, OTHER}; + String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); + Assert.assertArrayEquals("'Julie' should be 'start' only, the rest should be 'other'.", expected, actual); + } + + @Test + public void testEncodeDoubleTokenSpan() { + String[] sentence = "I saw Stefanie Schmidt today.".split(" "); + Span[] span = new Span[] { new Span(2,4, A_TYPE)}; + NameSample nameSample = new NameSample(sentence, span, true); + String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, OTHER}; + String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); + Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + + "'continue' and the rest should be 'other'.", expected, actual); + } + + @Test + public void testEncodeDoubleTokenSpanNoType() { + final String DEFAULT_START = "default" + "-" + BioCodec.START; + final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE; + String[] sentence = "I saw Stefanie Schmidt today.".split(" "); + Span[] span = new Span[] { new Span(2,4, null)}; + NameSample nameSample = new NameSample(sentence, span, true); + String[] expected = new String[] {OTHER, OTHER, DEFAULT_START, DEFAULT_CONTINUE, OTHER}; + String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); + Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + + "'continue' and the rest should be 'other'.", expected, actual); + } + + @Test + public void testEncodeAdjacentSingleSpans() { + String[] sentence = "something PersonA PersonB Something".split(" "); + Span[] span = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE) }; + NameSample nameSample = new NameSample(sentence, span, true); + String[] expected = new String[] {OTHER, A_START, A_START, OTHER}; + String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); + Assert.assertArrayEquals(expected, actual); + } + + @Test + public void testEncodeAdjacentSpans() { + String[] sentence = "something PersonA PersonA PersonB Something".split(" "); + Span[] span = new Span[] { new Span(1,3, A_TYPE), new Span(3, 4, A_TYPE) }; + NameSample nameSample = new NameSample(sentence, span, true); + String[] expected = new String[] {OTHER, A_START, A_CONTINUE, A_START, OTHER}; + String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); + Assert.assertArrayEquals(expected, actual); + } + + @Test + public void testCreateSequenceValidator() { + Assert.assertTrue(codec.createSequenceValidator() instanceof NameFinderSequenceValidator); + } + + + @Test + public void testDecodeEmpty() { + Span[] expected = new Span[] {}; + Span[] actual = codec.decode(new ArrayList<String>()); + Assert.assertArrayEquals(expected, actual); + } + /** + * Start, Other + */ + @Test + public void testDecodeSingletonFirst() { + + List<String> encoded = Arrays.asList(B_START, OTHER); + Span[] expected = new Span[] {new Span(0, 1, B_TYPE)}; + Span[] actual = codec.decode(encoded); + Assert.assertArrayEquals(expected, actual); + } + + /** + * Start Start Other + */ + @Test + public void testDecodeAdjacentSingletonFirst() { + List<String> encoded = Arrays.asList(B_START, B_START, OTHER); + Span[] expected = new Span[] {new Span(0, 1, B_TYPE), new Span(1, 2, B_TYPE)}; + Span[] actual = codec.decode(encoded); + Assert.assertArrayEquals(expected, actual); + } + + /** + * Start Continue Other + */ + @Test + public void testDecodePairFirst() { + List<String> encoded = Arrays.asList(B_START, B_CONTINUE, OTHER); + Span[] expected = new Span[] {new Span(0, 2, B_TYPE)}; + Span[] actual = codec.decode(encoded); + Assert.assertArrayEquals(expected, actual); + } + + /** + * Start Continue Continue Other + */ + @Test + public void testDecodeTripletFirst() { + List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_CONTINUE, OTHER); + Span[] expected = new Span[] {new Span(0, 3, B_TYPE)}; + Span[] actual = codec.decode(encoded); + Assert.assertArrayEquals(expected, actual); + } + + /** + * Start Continue Start Other + */ + @Test + public void testDecodeAdjacentPairSingleton() { + List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_START, OTHER); + Span[] expected = new Span[] {new Span(0, 2, B_TYPE), new Span(2, 3, B_TYPE)}; + Span[] actual = codec.decode(encoded); + Assert.assertArrayEquals(expected, actual); + } + + /** + * Other Start Other + */ + @Test + public void testDecodeOtherFirst() { + List<String> encoded = Arrays.asList(OTHER, B_START, OTHER); + Span[] expected = new Span[] {new Span(1, 2, B_TYPE)}; + Span[] actual = codec.decode(encoded); + Assert.assertArrayEquals(expected, actual); + } + + /** + * A-Start A-Continue, A-Continue, Other, B-Start, B-Continue, Other, C-Start, Other + */ + @Test + public void testDecodeMultiClass() { + List<String> encoded = Arrays.asList(OTHER, A_START, A_CONTINUE, A_CONTINUE, + OTHER, B_START, B_CONTINUE, OTHER, C_START, OTHER); + Span[] expected = new Span[] {new Span(1, 4, A_TYPE), + new Span(5, 7, B_TYPE), new Span(8, 9, C_TYPE)}; + Span[] actual = codec.decode(encoded); + Assert.assertArrayEquals(expected, actual); + } + + @Test + public void testCompatibilityEmpty() { + Assert.assertFalse(codec.areOutcomesCompatible(new String[] {})); + } + + @Test + public void testCompatibilitySingleStart() { + Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START})); + } + + @Test + public void testCompatibilitySingleContinue() { + Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE})); + Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, A_CONTINUE})); + } + + @Test + public void testCompatibilitySingleOther() { + Assert.assertFalse(codec.areOutcomesCompatible(new String[] {OTHER})); + } + + @Test + public void testCompatibilityStartContinue() { + Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE})); + } + + @Test + public void testCompatibilityStartOther() { + Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, OTHER})); + } + + @Test + public void testCompatibilityContinueOther() { + Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_CONTINUE, OTHER})); + Assert.assertFalse(codec.areOutcomesCompatible(new String[] {B_START, A_CONTINUE, OTHER})); + } + + @Test + public void testCompatibilityStartContinueOther() { + Assert.assertTrue(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE, OTHER})); + } + + + @Test + public void testCompatibilityMultiClass() { + Assert.assertTrue(codec.areOutcomesCompatible( + new String[] {A_START, A_CONTINUE, B_START, OTHER})); + } + + @Test + public void testCompatibilityBadTag() { + Assert.assertFalse(codec.areOutcomesCompatible(new String[] {A_START, A_CONTINUE, "BAD"})); + } + + @Test + public void testCompatibilityRepeated() { + Assert.assertTrue(codec.areOutcomesCompatible( + new String[] {A_START, A_START, A_CONTINUE, A_CONTINUE, B_START, B_START, OTHER, OTHER})); + } + +}
