Author: pkluegl Date: Wed Nov 6 10:03:05 2019 New Revision: 1869448 URL: http://svn.apache.org/viewvc?rev=1869448&view=rev Log: UIMA-6092: fixed bug in lookup, dictRemoveWS default set to true
Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java?rev=1869448&r1=1869447&r2=1869448&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java (original) +++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java Wed Nov 6 10:03:05 2019 @@ -390,8 +390,8 @@ public class RutaEngine extends JCasAnno */ public static final String PARAM_DICT_REMOVE_WS = "dictRemoveWS"; - @ConfigurationParameter(name = PARAM_DICT_REMOVE_WS, mandatory = false, defaultValue = "false") - private Boolean dictRemoveWS = false; + @ConfigurationParameter(name = PARAM_DICT_REMOVE_WS, mandatory = false, defaultValue = "true") + private Boolean dictRemoveWS = true; /** * If this parameter is set to any String value then this String/token is used to split columns in Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java?rev=1869448&r1=1869447&r2=1869448&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java (original) +++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java Wed Nov 6 10:03:05 2019 @@ -268,7 +268,7 @@ public class TreeWordList implements Rut TextNode wsNode = pointer.getChildNode(' '); if (ignoreWS && wsNode != null) { - result |= recursiveContains(wsNode, text, --next, ignoreCase, fragment, ignoreChars, + result |= recursiveContains(wsNode, text, next - 1, ignoreCase, fragment, ignoreChars, maxIgnoreChars, ignoreWS); } @@ -292,7 +292,7 @@ public class TreeWordList implements Rut } else { TextNode wsNode = pointer.getChildNode(' '); if (ignoreWS && wsNode != null) { - result |= recursiveContains(wsNode, text, --next, ignoreCase, fragment, ignoreChars, + result |= recursiveContains(wsNode, text, next - 1, ignoreCase, fragment, ignoreChars, maxIgnoreChars, ignoreWS); } Modified: uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml?rev=1869448&r1=1869447&r2=1869448&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml (original) +++ uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml Wed Nov 6 10:03:05 2019 @@ -336,6 +336,12 @@ <boolean>true</boolean> </value> </nameValuePair> + <nameValuePair> + <name>dictRemoveWS</name> + <value> + <boolean>true</boolean> + </value> + </nameValuePair> <nameValuePair> <name>inferenceVisitors</name> <value> Modified: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java?rev=1869448&r1=1869447&r2=1869448&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java (original) +++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java Wed Nov 6 10:03:05 2019 @@ -43,7 +43,7 @@ public class MarkFastTest { RutaTestUtils.assertAnnotationsEquals(cas, 1, 3, "1 0 0", "100", "2 0 0"); RutaTestUtils.assertAnnotationsEquals(cas, 2, 0); - RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "100"); + RutaTestUtils.assertAnnotationsEquals(cas, 3, 3, "1 0 0", "100", "2 0 0"); RutaTestUtils.assertAnnotationsEquals(cas, 4, 2, "1 0 0", "2 0 0"); } Modified: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java?rev=1869448&r1=1869447&r2=1869448&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java (original) +++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java Wed Nov 6 10:03:05 2019 @@ -66,8 +66,8 @@ public class ResourcesFromDataPathTest { ae.process(cas); RutaTestUtils.assertAnnotationsEquals(cas, 1, 3, "1 0 0", "100", "2 0 0"); - RutaTestUtils.assertAnnotationsEquals(cas, 2, 0); + RutaTestUtils.assertAnnotationsEquals(cas, 2, 3, "1 0 0", "100", "2 0 0"); RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "100"); - RutaTestUtils.assertAnnotationsEquals(cas, 4, 2, "1 0 0", "2 0 0"); + RutaTestUtils.assertAnnotationsEquals(cas, 4, 1, "100"); } } Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java?rev=1869448&view=auto ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java (added) +++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java Wed Nov 6 10:03:05 2019 @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.uima.ruta.resource; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.TypeSystem; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.fit.factory.JCasFactory; +import org.apache.uima.jcas.JCas; +import org.apache.uima.ruta.FilterManager; +import org.apache.uima.ruta.RutaStream; +import org.apache.uima.ruta.engine.Ruta; +import org.apache.uima.ruta.engine.RutaTestUtils; +import org.apache.uima.ruta.seed.TextSeeder; +import org.apache.uima.ruta.type.RutaBasic; +import org.apache.uima.ruta.visitor.InferenceCrowd; +import org.junit.Assert; +import org.junit.Test; + +public class TreeWordListTest { + + @Test + public void testWithAction() throws Exception { + + String text = "ab"; + String script = "STRINGLIST list = {\"ab\", \"a c\", \"a d\"};"; + script += "MARKFAST(T1, list);"; + + CAS cas = RutaTestUtils.getCAS(text); + Ruta.apply(cas, script); + + RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, text); + } + + @Test + public void testFind() throws Exception { + + String text = "ab"; + List<String> data = Arrays.asList(text, "a c", "a d"); + TreeWordList twl = new TreeWordList(data, false); + + JCas jcas = JCasFactory.createJCas(); + jcas.setDocumentText(text); + CAS cas = jcas.getCas(); + RutaStream stream = createStream(text, cas); + + List<AnnotationFS> result1 = twl.find(stream, false, 0, null, 0, false); + Assert.assertEquals(1, result1.size()); + Assert.assertEquals(text, result1.get(0).getCoveredText()); + + List<AnnotationFS> result2 = twl.find(stream, false, 0, null, 0, true); + Assert.assertEquals(1, result2.size()); + Assert.assertEquals(text, result2.get(0).getCoveredText()); + + List<AnnotationFS> result3 = twl.find(stream, true, 0, null, 0, false); + Assert.assertEquals(1, result3.size()); + Assert.assertEquals(text, result3.get(0).getCoveredText()); + } + + private RutaStream createStream(String text, CAS cas) { + Type basicType = cas.getTypeSystem().getType(RutaBasic.class.getName()); + + Collection<Type> filterTypes = getDefaultFilterTypes(cas); + + FilterManager filter = new FilterManager(filterTypes, true, cas); + TextSeeder seeder = new TextSeeder(); + seeder.seed(text, cas); + InferenceCrowd crowd = new InferenceCrowd(new ArrayList<>()); + RutaStream stream = new RutaStream(cas, basicType, filter, false, false, true, null, crowd); + stream.initalizeBasics(new String[] { CAS.TYPE_NAME_ANNOTATION }, false); + return stream; + } + + private Collection<Type> getDefaultFilterTypes(CAS cas) { + Collection<Type> filterTypes = new ArrayList<Type>(); + TypeSystem typeSystem = cas.getTypeSystem(); + String[] defaultFilteredTypes = new String[] { "org.apache.uima.ruta.type.SPACE", + "org.apache.uima.ruta.type.BREAK", "org.apache.uima.ruta.type.MARKUP" }; + for (String each : defaultFilteredTypes) { + Type type = typeSystem.getType(each); + if (type != null) { + filterTypes.add(type); + } + } + return filterTypes; + } + +} \ No newline at end of file