Author: tommaso
Date: Tue Nov  8 11:04:23 2011
New Revision: 1199190

URL: http://svn.apache.org/viewvc?rev=1199190&view=rev
Log:
moved RegExAnnotatorAOService into services package, fixed the AE aggregate 
descriptor, added a local copy of the concepts file, added a simple service 
test, fixed import-package directive

Added:
    
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/
    
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
    
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml
    incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/
    incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/
    
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/
    
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/
    
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/
    
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/
    
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
Removed:
    
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/RegExAnnotatorAOService.java
Modified:
    incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml
    
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml

Modified: incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml?rev=1199190&r1=1199189&r2=1199190&view=diff
==============================================================================
--- incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml (original)
+++ incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml Tue Nov  8 
11:04:23 2011
@@ -58,6 +58,12 @@
       <artifactId>RegularExpressionAnnotator</artifactId>
       <version>2.3.1</version>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.8.2</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <plugins>
@@ -68,7 +74,7 @@
         <configuration>
           <instructions>
             <Import-Package>
-              
javax.ws.rs.*;javax.xml.stream.*;org.osgi.service.component.*;org.apache.uima.*;org.apache.clerezza.*;org.apache.commons.io.*
+              
javax.ws.rs.*;org.apache.uima.*;org.apache.clerezza.*;org.apache.commons.io.*
             </Import-Package>
             <Export-Package>org.apache.clerezza.uima.samples.*</Export-Package>
             
<Bundle-Activator>org.apache.clerezza.uima.samples.UIMASamplesBundleActivator</Bundle-Activator>

Added: 
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
URL: 
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java?rev=1199190&view=auto
==============================================================================
--- 
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
 (added)
+++ 
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
 Tue Nov  8 11:04:23 2011
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.clerezza.uima.samples.services;
+
+import org.apache.clerezza.rdf.core.Graph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.access.TcManager;
+import org.apache.clerezza.uima.utils.UIMAExecutor;
+import org.apache.clerezza.uima.utils.UIMAExecutorFactory;
+import org.apache.commons.io.IOUtils;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.uima.util.XMLInputSource;
+
+import javax.ws.rs.*;
+import javax.ws.rs.core.Response;
+import java.net.URI;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Sample REST service which uses {@link 
org.apache.uima.annotator.regex.impl.RegExAnnotator} to extract named entities
+ * from the text of a given URI
+ */
+@Component
+@Service(Object.class)
+@Property(name = "javax.ws.rs", boolValue = true)
+@Path("/uima")
+public class RegExAnnotatorAOService {
+
+  private static final String PATH = "/META-INF/AggregateRegExAOAE.xml";
+  private static final String OUTPUTGRAPH = "outputgraph";
+
+  @POST
+  @Path("regex")
+  @Produces("application/rdf+xml")
+  public Graph enrichUri(@QueryParam("uri") String uriString) {
+    if (uriString == null || uriString.length() == 0)
+      throw new WebApplicationException(Response.status(
+              Response.Status.BAD_REQUEST).entity(new StringBuilder("No URI 
specified").toString()).build());
+
+    UIMAExecutor executor = 
UIMAExecutorFactory.getInstance().createUIMAExecutor();
+    Map<String, Object> parameters = new HashMap<String, Object>();
+    parameters.put(OUTPUTGRAPH, uriString);
+    try {
+      URL url = URI.create(uriString).toURL();
+      String text = IOUtils.toString(url.openStream());
+      executor.analyzeDocument(text, new 
XMLInputSource(getClass().getResource(PATH)), parameters);
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new WebApplicationException(Response.status(
+              Response.Status.INTERNAL_SERVER_ERROR).entity(new 
StringBuilder("Failed UIMA execution on URI ").
+              append(uriString).append(" due to 
\n").append(e.getLocalizedMessage()).toString()).build());
+    }
+    return TcManager.getInstance().getMGraph(new UriRef(uriString)).getGraph();
+  }
+
+
+}
+

Modified: 
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml
URL: 
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml?rev=1199190&r1=1199189&r2=1199190&view=diff
==============================================================================
--- 
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml
 (original)
+++ 
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml
 Tue Nov  8 11:04:23 2011
@@ -56,12 +56,12 @@
         </overrides>
       </configurationParameter>
       <configurationParameter>
-        <name>alchemykey</name>
+        <name>concepts</name>
         <type>String</type>
-        <multiValued>false</multiValued>
+        <multiValued>true</multiValued>
         <mandatory>true</mandatory>
         <overrides>
-          <parameter>UrlConceptTaggingAEDescriptor/apikey</parameter>
+          <parameter>RegExAnnotator/ConceptFiles</parameter>
         </overrides>
       </configurationParameter>
     </configurationParameters>
@@ -78,11 +78,18 @@
           <string>ao</string>
         </value>
       </nameValuePair>
+      <nameValuePair>
+        <name>concepts</name>
+        <value>
+          <array>
+            <string>META-INF/concepts.xml</string>
+          </array>
+        </value>
+      </nameValuePair>
     </configurationParameterSettings>
-    >
     <flowConstraints>
       <fixedFlow>
-        <node>UrlConceptTaggingAEDescriptor</node>
+        <node>RegExAnnotator</node>
         <node>ClerezzaCASConsumerDescriptor</node>
       </fixedFlow>
     </flowConstraints>

Added: 
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml
URL: 
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml?rev=1199190&view=auto
==============================================================================
--- 
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml
 (added)
+++ 
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml
 Tue Nov  8 11:04:23 2011
@@ -0,0 +1,118 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+       * Licensed to the Apache Software Foundation (ASF) under one
+       * or more contributor license agreements.  See the NOTICE file
+       * distributed with this work for additional information
+       * regarding copyright ownership.  The ASF licenses this file
+       * to you under the Apache License, Version 2.0 (the
+       * "License"); you may not use this file except in compliance
+       * with the License.  You may obtain a copy of the License at
+       * 
+       *   http://www.apache.org/licenses/LICENSE-2.0
+       * 
+       * Unless required by applicable law or agreed to in writing,
+       * software distributed under the License is distributed on an
+       * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+       * KIND, either express or implied.  See the License for the
+       * specific language governing permissions and limitations
+       * under the License.
+-->
+<conceptSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+            xmlns="http://incubator.apache.org/uima/regex";
+            xsi:schemaLocation="concept.xsd">
+
+  <concept name="emailAddressDetection">
+    <rules>
+      <rule
+              
regEx="([a-zA-Z0-9!#$%*+'/=?^_\x2D`{|}~.\x26]+)@([a-zA-Z0-9._-]+[a-zA-Z]{2,4})"
+              matchStrategy="matchAll" 
matchType="uima.tcas.DocumentAnnotation"/>
+    </rules>
+    <createAnnotations>
+      <annotation id="emailAnnot"
+                  type="org.apache.uima.EmailAddress">
+        <begin group="0"/>
+        <end group="0"/>
+        <setFeature name="localPart" type="String"
+                    normalization="ToLowerCase">
+          $1
+        </setFeature>
+        <setFeature name="domainPart" type="String"
+                    normalization="ToLowerCase">
+          $2
+        </setFeature>
+        <setFeature name="normalizedEmail" type="String"
+                    normalization="ToLowerCase">
+          $0
+        </setFeature>
+      </annotation>
+    </createAnnotations>
+  </concept>
+
+  <concept name="isbnNumberDetection">
+    <rules>
+      <rule regEx="(97(8|9))?-?(\d{9}|(\d|-){11})-?(\d|X)"
+            matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"
+            confidence="1.0"/>
+    </rules>
+    <createAnnotations>
+      <annotation id="isbnNumber"
+                  type="org.apache.uima.ISBNNumber"
+                  
validate="org.apache.uima.annotator.regex.extension.impl.ISBNNumberValidator">
+        <begin group="0"/>
+        <end group="0"/>
+        <setFeature name="confidence" type="Confidence"/>
+      </annotation>
+    </createAnnotations>
+  </concept>
+
+  <concept name="creditCardNumberDetection" processAllRules="true">
+    <rules>
+      <rule ruleId="AmericanExpress"
+            regEx="(((34|37)\d{2}[- ]?)(\d{6}[- ]?)\d{5})"
+            matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"
+            confidence="1.0"/>
+      <rule ruleId="Visa"
+            regEx="((4\d{3}[- ]?)(\d{4}[- ]?){2}\d{4})" 
matchStrategy="matchAll"
+            matchType="uima.tcas.DocumentAnnotation" confidence="1.0"/>
+      <rule ruleId="MasterCard"
+            regEx="((5[1-5]\d{2}[- ]?)(\d{4}[- ]?){2}\d{4})"
+            matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"
+            confidence="1.0"/>
+      <rule ruleId="unknown"
+            regEx="(([1-6]\d{3}[- ])(\d{4}[- 
]){2}\d{4})|([1-6]\d{13,18})|([1-6]\d{3}[- ]\d{6}[- ]\d{5})"
+            matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"
+            confidence="1.0"/>
+    </rules>
+    <createAnnotations>
+      <annotation id="creditCardNumber"
+                  type="org.apache.uima.CreditCardNumber"
+                  
validate="org.apache.uima.annotator.regex.extension.impl.CreditCardNumberValidator">
+        <begin group="0"/>
+        <end group="0"/>
+        <setFeature name="confidence" type="Confidence"/>
+        <setFeature name="cardType" type="RuleId"/>
+      </annotation>
+    </createAnnotations>
+  </concept>
+
+  <concept name="MoneyAmountDetection" processAllRules="true">
+    <!-- \p{Sc} -> currentySymbol -->
+    <!-- (?i) -> case insensitive match -->
+    <!-- \s -> whitespace character -->
+    <rules>
+      <rule 
regEx="\m{currency}(\p{Sc}\s?|(?i)USD\s?|(?i)Dollars\s?|(?i)Dollar\s?|(?i)CNY\s?|(?i)CAD\s?|(?i)GBP\s?|(?i)Pounds\s?|(?i)Pound\s?|(?i)Euros\s?|(?i)Euro\s?|(?i)Yen\s?|(?i)EUR\s?)\m{amount}(\d+(,\d\d\d)*(\.\d\d?)?)\m{amountText}(\s?(?i)million|\s?(?i)billion)?"
+            matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"/>
+      <rule 
regEx="\m{amount}(\d+(,\d\d\d)*(\.\d\d?\d?)?)\m{amountText}(\s?(?i)million|\s?(?i)billion)?\m{currency}(\s?\p{Sc}|\s?(?i)USD\b|\s?(?i)Dollars\b|\s?(?i)Dollar\b|\s?(?i)CNY\b|\s?(?i)CAD\b|\s?(?i)GBP\b|\s?(?i)Pounds\b|\s?(?i)Pound\b|\s?(?i)Euros\b|\s?(?i)Euro\b|\s?(?i)Yen\b|\s?(?i)EUR\b)"
+            matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"/>
+    </rules>
+    <createAnnotations>
+      <annotation type="org.apache.uima.MoneyAmount">
+        <begin group="0"/>
+        <end group="0"/>
+        <setFeature name="currency" type="String" 
normalization="Trim">${currency}</setFeature>
+        <setFeature name="amount" type="Float">${amount}</setFeature>
+        <setFeature name="amountText" type="String" 
normalization="Trim">${amountText}</setFeature>
+      </annotation>
+    </createAnnotations>
+  </concept>
+</conceptSet>

Added: 
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
URL: 
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java?rev=1199190&view=auto
==============================================================================
--- 
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
 (added)
+++ 
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
 Tue Nov  8 11:04:23 2011
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.clerezza.uima.samples.services;
+
+import org.apache.clerezza.rdf.core.Graph;
+import org.junit.Test;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+/**
+ * Testcase for {@link RegExAnnotatorAOService}
+ */
+public class RegExAnnotatorAOServiceTest {
+
+  @Test
+  public void serviceExecutionTest() {
+    try {
+      RegExAnnotatorAOService service = new RegExAnnotatorAOService();
+      Graph graph = 
service.enrichUri("http://www.apache.org/foundation/sponsorship.html";);
+      assertNotNull(graph);
+    } catch (Exception e) {
+      e.printStackTrace();
+      fail(e.getLocalizedMessage());
+    }
+  }
+}


Reply via email to