Author: pkluegl
Date: Wed Nov  6 10:03:05 2019
New Revision: 1869448

URL: http://svn.apache.org/viewvc?rev=1869448&view=rev
Log:
UIMA-6092: fixed bug in lookup, dictRemoveWS default set to true

Added:
    
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java
Modified:
    
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
    
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
    
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml
    
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java
    
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java

Modified: 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java?rev=1869448&r1=1869447&r2=1869448&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
 Wed Nov  6 10:03:05 2019
@@ -390,8 +390,8 @@ public class RutaEngine extends JCasAnno
    */
   public static final String PARAM_DICT_REMOVE_WS = "dictRemoveWS";
 
-  @ConfigurationParameter(name = PARAM_DICT_REMOVE_WS, mandatory = false, 
defaultValue = "false")
-  private Boolean dictRemoveWS = false;
+  @ConfigurationParameter(name = PARAM_DICT_REMOVE_WS, mandatory = false, 
defaultValue = "true")
+  private Boolean dictRemoveWS = true;
 
   /**
    * If this parameter is set to any String value then this String/token is 
used to split columns in

Modified: 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java?rev=1869448&r1=1869447&r2=1869448&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
 Wed Nov  6 10:03:05 2019
@@ -268,7 +268,7 @@ public class TreeWordList implements Rut
 
       TextNode wsNode = pointer.getChildNode(' ');
       if (ignoreWS && wsNode != null) {
-        result |= recursiveContains(wsNode, text, --next, ignoreCase, 
fragment, ignoreChars,
+        result |= recursiveContains(wsNode, text, next - 1, ignoreCase, 
fragment, ignoreChars,
                 maxIgnoreChars, ignoreWS);
       }
 
@@ -292,7 +292,7 @@ public class TreeWordList implements Rut
     } else {
       TextNode wsNode = pointer.getChildNode(' ');
       if (ignoreWS && wsNode != null) {
-        result |= recursiveContains(wsNode, text, --next, ignoreCase, 
fragment, ignoreChars,
+        result |= recursiveContains(wsNode, text, next - 1, ignoreCase, 
fragment, ignoreChars,
                 maxIgnoreChars, ignoreWS);
       }
 

Modified: 
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml?rev=1869448&r1=1869447&r2=1869448&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml
 Wed Nov  6 10:03:05 2019
@@ -336,6 +336,12 @@
           <boolean>true</boolean>
         </value>
       </nameValuePair>
+       <nameValuePair>
+        <name>dictRemoveWS</name>
+        <value>
+          <boolean>true</boolean>
+        </value>
+      </nameValuePair>
       <nameValuePair>
         <name>inferenceVisitors</name>
         <value>

Modified: 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java?rev=1869448&r1=1869447&r2=1869448&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java
 Wed Nov  6 10:03:05 2019
@@ -43,7 +43,7 @@ public class MarkFastTest {
 
     RutaTestUtils.assertAnnotationsEquals(cas, 1, 3, "1 0 0", "100", "2 0 0");
     RutaTestUtils.assertAnnotationsEquals(cas, 2, 0);
-    RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "100");
+    RutaTestUtils.assertAnnotationsEquals(cas, 3, 3, "1 0 0", "100", "2 0 0");
     RutaTestUtils.assertAnnotationsEquals(cas, 4, 2, "1 0 0", "2 0 0");
 
   }

Modified: 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java?rev=1869448&r1=1869447&r2=1869448&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java
 Wed Nov  6 10:03:05 2019
@@ -66,8 +66,8 @@ public class ResourcesFromDataPathTest {
     ae.process(cas);
 
     RutaTestUtils.assertAnnotationsEquals(cas, 1, 3, "1 0 0", "100", "2 0 0");
-    RutaTestUtils.assertAnnotationsEquals(cas, 2, 0);
+    RutaTestUtils.assertAnnotationsEquals(cas, 2, 3, "1 0 0", "100", "2 0 0");
     RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "100");
-    RutaTestUtils.assertAnnotationsEquals(cas, 4, 2, "1 0 0", "2 0 0");
+    RutaTestUtils.assertAnnotationsEquals(cas, 4, 1, "100");
   }
 }

Added: 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java?rev=1869448&view=auto
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java
 (added)
+++ 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java
 Wed Nov  6 10:03:05 2019
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.ruta.resource;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.ruta.FilterManager;
+import org.apache.uima.ruta.RutaStream;
+import org.apache.uima.ruta.engine.Ruta;
+import org.apache.uima.ruta.engine.RutaTestUtils;
+import org.apache.uima.ruta.seed.TextSeeder;
+import org.apache.uima.ruta.type.RutaBasic;
+import org.apache.uima.ruta.visitor.InferenceCrowd;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TreeWordListTest {
+
+  @Test
+  public void testWithAction() throws Exception {
+
+    String text = "ab";
+    String script = "STRINGLIST list = {\"ab\", \"a c\", \"a d\"};";
+    script += "MARKFAST(T1, list);";
+
+    CAS cas = RutaTestUtils.getCAS(text);
+    Ruta.apply(cas, script);
+
+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, text);
+  }
+
+  @Test
+  public void testFind() throws Exception {
+
+    String text = "ab";
+    List<String> data = Arrays.asList(text, "a c", "a d");
+    TreeWordList twl = new TreeWordList(data, false);
+
+    JCas jcas = JCasFactory.createJCas();
+    jcas.setDocumentText(text);
+    CAS cas = jcas.getCas();
+    RutaStream stream = createStream(text, cas);
+
+    List<AnnotationFS> result1 = twl.find(stream, false, 0, null, 0, false);
+    Assert.assertEquals(1, result1.size());
+    Assert.assertEquals(text, result1.get(0).getCoveredText());
+
+    List<AnnotationFS> result2 = twl.find(stream, false, 0, null, 0, true);
+    Assert.assertEquals(1, result2.size());
+    Assert.assertEquals(text, result2.get(0).getCoveredText());
+
+    List<AnnotationFS> result3 = twl.find(stream, true, 0, null, 0, false);
+    Assert.assertEquals(1, result3.size());
+    Assert.assertEquals(text, result3.get(0).getCoveredText());
+  }
+
+  private RutaStream createStream(String text, CAS cas) {
+    Type basicType = cas.getTypeSystem().getType(RutaBasic.class.getName());
+
+    Collection<Type> filterTypes = getDefaultFilterTypes(cas);
+
+    FilterManager filter = new FilterManager(filterTypes, true, cas);
+    TextSeeder seeder = new TextSeeder();
+    seeder.seed(text, cas);
+    InferenceCrowd crowd = new InferenceCrowd(new ArrayList<>());
+    RutaStream stream = new RutaStream(cas, basicType, filter, false, false, 
true, null, crowd);
+    stream.initalizeBasics(new String[] { CAS.TYPE_NAME_ANNOTATION }, false);
+    return stream;
+  }
+
+  private Collection<Type> getDefaultFilterTypes(CAS cas) {
+    Collection<Type> filterTypes = new ArrayList<Type>();
+    TypeSystem typeSystem = cas.getTypeSystem();
+    String[] defaultFilteredTypes = new String[] { 
"org.apache.uima.ruta.type.SPACE",
+        "org.apache.uima.ruta.type.BREAK", "org.apache.uima.ruta.type.MARKUP" 
};
+    for (String each : defaultFilteredTypes) {
+      Type type = typeSystem.getType(each);
+      if (type != null) {
+        filterTypes.add(type);
+      }
+    }
+    return filterTypes;
+  }
+
+}
\ No newline at end of file


Reply via email to