This is an automated email from the ASF dual-hosted git repository.
rec pushed a commit to branch feature/UIMA-6152-trim-method-for-AnnotationFS
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git
commit 827f4f695e64b04ffb9621e2047190977f869739
Author: Richard Eckart de Castilho
AuthorDate: Mon Jan 6 23:19:05 2020 +0100
[UIMA-6152] "trim" method for AnnotationFS
- Introduce a Unicode-aware trim() method on AnnotationFS
- Added unit tests
- Added CasCreationUtils.createCas() no-args convenience method
---
uimaj-core/pom.xml | 9 +-
.../org/apache/uima/cas/text/AnnotationFS.java | 22 ++-
.../java/org/apache/uima/jcas/tcas/Annotation.java | 36
.../org/apache/uima/util/CasCreationUtils.java | 12 ++
.../org/apache/uima/jcas/tcas/AnnotationTest.java | 205 +
uimaj-parent/pom.xml | 6 +
6 files changed, 285 insertions(+), 5 deletions(-)
diff --git a/uimaj-core/pom.xml b/uimaj-core/pom.xml
index 958c911..eb12807 100644
--- a/uimaj-core/pom.xml
+++ b/uimaj-core/pom.xml
@@ -204,8 +204,13 @@
asm-tree
5.0.4
- -->
-
+ -->
+
+
+ org.assertj
+ assertj-core
+ test
+
diff --git
a/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationFS.java
b/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationFS.java
index d281c34..2f48c01 100644
--- a/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationFS.java
+++ b/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationFS.java
@@ -19,9 +19,9 @@
package org.apache.uima.cas.text;
+import java.util.function.IntPredicate;
+
import org.apache.uima.cas.AnnotationBaseFS;
-import org.apache.uima.cas.CASRuntimeException;
-import org.apache.uima.cas.impl.FeatureStructureImplC;
/**
* Interface for Annotation Feature Structures.
@@ -74,4 +74,20 @@ public interface AnnotationFS extends AnnotationBaseFS {
*/
String getCoveredText();
- }
+ /**
+ * Strips leading and trailing whitespace by increasing/decreasing the
begin/end offsets. This
+ * method is aware of Unicode codepoints. It expects that the begin/end
offsets point to valid
+ * codepoints.
+ */
+ default void trim() {
+ trim(Character::isWhitespace);
+ }
+
+ /**
+ * Strips leading and trailing characters matching the given predicate by
increasing/decreasing
+ * the begin/end offsets.
+ *
+ * @see #trim()
+ */
+ void trim(IntPredicate aPredicate);
+}
diff --git a/uimaj-core/src/main/java/org/apache/uima/jcas/tcas/Annotation.java
b/uimaj-core/src/main/java/org/apache/uima/jcas/tcas/Annotation.java
index d9cf124..b424321 100644
--- a/uimaj-core/src/main/java/org/apache/uima/jcas/tcas/Annotation.java
+++ b/uimaj-core/src/main/java/org/apache/uima/jcas/tcas/Annotation.java
@@ -21,6 +21,7 @@ package org.apache.uima.jcas.tcas;
import java.lang.invoke.CallSite;
import java.lang.invoke.MethodHandle;
+import java.util.function.IntPredicate;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.admin.LinearTypeOrder;
@@ -244,4 +245,39 @@ public class Annotation extends AnnotationBase implements
AnnotationImpl {
return Integer.compare(_id, other._id);
}
+ @Override
+ public void trim(IntPredicate aIsTrimChar) {
+int begin = getBegin();
+int end = getEnd();
+String text = _casView.getDocumentText();
+
+// If the span is empty, there is nothing to trim
+if (begin == end) {
+ return;
+}
+
+// First we trim at the end. If a trimmed span is empty, we want to return
the original
+// begin as the begin/end of the trimmed span
+int backwardsSeekingCodepoint;
+while (
+ (end > 0)
+ && end > begin
+ && aIsTrimChar.test(backwardsSeekingCodepoint =
text.codePointBefore(end))
+) {
+ end -= Character.charCount(backwardsSeekingCodepoint);
+}
+
+// Then, trim at the start
+int forwardSeekingCodepoint;
+while (
+ (begin < (text.length() - 1))
+ && begin < end
+ && aIsTrimChar.test(forwardSeekingCodepoint =
text.codePointAt(begin))
+) {
+ begin += Character.charCount(forwardSeekingCodepoint);
+}
+
+setBegin(begin);
+setEnd(end);
+ }
}
diff --git
a/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
b/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
index 30dda98..e4246a7 100644
--- a/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
+++ b/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
@@ -92,6 +92,18 @@ public class CasCreationUtils {
private final static FeatureDescription[] EMPTY_FEAT_DESC_ARRAY = new
FeatureDescription[0];
/**
+ * Creates a new CAS instance.
+ *
+ * @return a new CAS instance
+ *
+ * @throws ResourceInitializationException
+ *