[uima-uimaj] 01/01: [UIMA-6152] "trim" method for AnnotationFS

2020-01-06 Thread rec
This is an automated email from the ASF dual-hosted git repository.

rec pushed a commit to branch feature/UIMA-6152-trim-method-for-AnnotationFS
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git

commit 827f4f695e64b04ffb9621e2047190977f869739
Author: Richard Eckart de Castilho 
AuthorDate: Mon Jan 6 23:19:05 2020 +0100

[UIMA-6152] "trim" method for AnnotationFS

- Introduce a Unicode-aware trim() method on AnnotationFS
- Added unit tests
- Added CasCreationUtils.createCas() no-args convenience method
---
 uimaj-core/pom.xml |   9 +-
 .../org/apache/uima/cas/text/AnnotationFS.java |  22 ++-
 .../java/org/apache/uima/jcas/tcas/Annotation.java |  36 
 .../org/apache/uima/util/CasCreationUtils.java |  12 ++
 .../org/apache/uima/jcas/tcas/AnnotationTest.java  | 205 +
 uimaj-parent/pom.xml   |   6 +
 6 files changed, 285 insertions(+), 5 deletions(-)

diff --git a/uimaj-core/pom.xml b/uimaj-core/pom.xml
index 958c911..eb12807 100644
--- a/uimaj-core/pom.xml
+++ b/uimaj-core/pom.xml
@@ -204,8 +204,13 @@
   asm-tree
   5.0.4
 
- -->   
-
+ -->
+ 
+
+  org.assertj
+  assertj-core
+  test
+



diff --git 
a/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationFS.java 
b/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationFS.java
index d281c34..2f48c01 100644
--- a/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationFS.java
+++ b/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationFS.java
@@ -19,9 +19,9 @@
 
 package org.apache.uima.cas.text;
 
+import java.util.function.IntPredicate;
+
 import org.apache.uima.cas.AnnotationBaseFS;
-import org.apache.uima.cas.CASRuntimeException;
-import org.apache.uima.cas.impl.FeatureStructureImplC;
 
 /**
  * Interface for Annotation Feature Structures.
@@ -74,4 +74,20 @@ public interface AnnotationFS extends AnnotationBaseFS {
*/
   String getCoveredText();
 
- }
+  /**
+   * Strips leading and trailing whitespace by increasing/decreasing the 
begin/end offsets. This 
+   * method is aware of Unicode codepoints. It expects that the begin/end 
offsets point to valid
+   * codepoints.
+   */
+  default void trim() {
+  trim(Character::isWhitespace);
+  }
+  
+  /**
+   * Strips leading and trailing characters matching the given predicate by 
increasing/decreasing 
+   * the begin/end offsets.
+   * 
+   * @see #trim()
+   */
+  void trim(IntPredicate aPredicate);
+}
diff --git a/uimaj-core/src/main/java/org/apache/uima/jcas/tcas/Annotation.java 
b/uimaj-core/src/main/java/org/apache/uima/jcas/tcas/Annotation.java
index d9cf124..b424321 100644
--- a/uimaj-core/src/main/java/org/apache/uima/jcas/tcas/Annotation.java
+++ b/uimaj-core/src/main/java/org/apache/uima/jcas/tcas/Annotation.java
@@ -21,6 +21,7 @@ package org.apache.uima.jcas.tcas;
 
 import java.lang.invoke.CallSite;
 import java.lang.invoke.MethodHandle;
+import java.util.function.IntPredicate;
 
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.admin.LinearTypeOrder;
@@ -244,4 +245,39 @@ public class Annotation extends AnnotationBase implements 
AnnotationImpl {
 return Integer.compare(_id,  other._id);
   }
 
+  @Override
+  public void trim(IntPredicate aIsTrimChar) {
+int begin = getBegin();
+int end = getEnd();
+String text = _casView.getDocumentText();
+  
+// If the span is empty, there is nothing to trim
+if (begin == end) {
+  return;
+}
+  
+// First we trim at the end. If a trimmed span is empty, we want to return 
the original 
+// begin as the begin/end of the trimmed span
+int backwardsSeekingCodepoint;
+while (
+  (end > 0)
+  && end > begin
+  && aIsTrimChar.test(backwardsSeekingCodepoint = 
text.codePointBefore(end))
+) {
+  end -= Character.charCount(backwardsSeekingCodepoint);
+}
+
+// Then, trim at the start
+int forwardSeekingCodepoint;
+while (
+  (begin < (text.length() - 1))
+  && begin < end
+  && aIsTrimChar.test(forwardSeekingCodepoint = 
text.codePointAt(begin))
+) {
+  begin += Character.charCount(forwardSeekingCodepoint);
+}
+  
+setBegin(begin);
+setEnd(end);
+  }
 }
diff --git 
a/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java 
b/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
index 30dda98..e4246a7 100644
--- a/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
+++ b/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
@@ -92,6 +92,18 @@ public class CasCreationUtils {
   private final static FeatureDescription[] EMPTY_FEAT_DESC_ARRAY = new 
FeatureDescription[0];
   
   /**
+   * Creates a new CAS instance.
+   * 
+   * @return a new CAS instance
+   * 
+   * @throws ResourceInitializationException
+   *

[uima-uimaj] branch feature/UIMA-6152-trim-method-for-AnnotationFS created (now 827f4f6)

2020-01-06 Thread rec
This is an automated email from the ASF dual-hosted git repository.

rec pushed a change to branch feature/UIMA-6152-trim-method-for-AnnotationFS
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git.


  at 827f4f6  [UIMA-6152] "trim" method for AnnotationFS

This branch includes the following new commits:

 new 827f4f6  [UIMA-6152] "trim" method for AnnotationFS

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.




[uima-uimaj] branch robin-hood-hash updated: turn off slow perf test of hashset

2020-01-06 Thread schor
This is an automated email from the ASF dual-hosted git repository.

schor pushed a commit to branch robin-hood-hash
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git


The following commit(s) were added to refs/heads/robin-hood-hash by this push:
 new 851fee2  turn off slow perf test of hashset
851fee2 is described below

commit 851fee28430677829defba8f71f549b5b7819de6
Author: Marshall Schor 
AuthorDate: Mon Jan 6 15:44:48 2020 -0500

turn off slow perf test of hashset
---
 .../test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java   | 2 +-
 .../src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTest.java | 2 +-
 .../test/java/org/apache/uima/internal/util/ObjHashSetPerfTestRh.java   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git 
a/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
index 5bb5286..008da8f 100644
--- 
a/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
+++ 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetPerfTestRh.java
@@ -31,7 +31,7 @@ public class IntHashSetPerfTestRh extends TestCase {
* 
* Tests both IntHashSet and IntBitSet
*/
-  final boolean SKIP = false;
+  final boolean SKIP = true;
   
   static int cacheLoadSize;
   
diff --git 
a/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTest.java
 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTest.java
index aab13fb..3e5e934 100644
--- 
a/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTest.java
+++ 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTest.java
@@ -29,7 +29,7 @@ public class ObjHashSetPerfTest extends TestCase {
* 
* Tests both IntHashSet and IntBitSet
*/
-  final boolean SKIP = false;
+  final boolean SKIP = true;
   
   static int cacheLoadSize;
   
diff --git 
a/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTestRh.java
 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTestRh.java
index 24ec769..22ef907 100644
--- 
a/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTestRh.java
+++ 
b/uimaj-core/src/test/java/org/apache/uima/internal/util/ObjHashSetPerfTestRh.java
@@ -31,7 +31,7 @@ public class ObjHashSetPerfTestRh extends TestCase {
* 
* Tests both IntHashSet and IntBitSet
*/
-  final boolean SKIP = false;
+  final boolean SKIP = true;
   
   static int cacheLoadSize;