Repository: opennlp-sandbox
Updated Branches:
  refs/heads/master dce84c0a6 -> 4350f64c0


Move brat annotator to opennlp.git

OPENNLP-867


Project: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/commit/4350f64c
Tree: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/tree/4350f64c
Diff: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/diff/4350f64c

Branch: refs/heads/master
Commit: 4350f64c009141bd80234113c39d30c61fa47020
Parents: dce84c0
Author: Jörn Kottmann <[email protected]>
Authored: Wed Oct 19 23:42:13 2016 +0200
Committer: Jörn Kottmann <[email protected]>
Committed: Wed Oct 19 23:42:13 2016 +0200

----------------------------------------------------------------------
 opennlp-brat-annotator/pom.xml                  |  88 -----------
 .../opennlp/bratann/NameFinderAnnService.java   | 119 ---------------
 .../opennlp/bratann/NameFinderResource.java     | 148 -------------------
 3 files changed, 355 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/4350f64c/opennlp-brat-annotator/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml
deleted file mode 100644
index 93e3620..0000000
--- a/opennlp-brat-annotator/pom.xml
+++ /dev/null
@@ -1,88 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more 
contributor 
-       license agreements. See the NOTICE file distributed with this work for 
additional 
-       information regarding copyright ownership. The ASF licenses this file 
to 
-       you under the Apache License, Version 2.0 (the "License"); you may not 
use 
-       this file except in compliance with the License. You may obtain a copy 
of 
-       the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
required 
-       by applicable law or agreed to in writing, software distributed under 
the 
-       License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
CONDITIONS 
-       OF ANY KIND, either express or implied. See the License for the 
specific 
-       language governing permissions and limitations under the License. -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
-       <modelVersion>4.0.0</modelVersion>
-
-       <groupId>org.apache.opennlp</groupId>
-       <artifactId>opennlp-brat-annotator</artifactId>
-       <version>1.0-SNAPSHOT</version>
-       <packaging>jar</packaging>
-
-       <name>opennlp-brat-annotator</name>
-
-       <properties>
-               
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-       </properties>
-
-       <dependencies>
-               <dependency>
-                       <groupId>org.eclipse.jetty</groupId>
-                       <artifactId>jetty-server</artifactId>
-                       <version>9.2.3.v20140905</version>
-               </dependency>
-
-               <dependency>
-                       <groupId>org.eclipse.jetty</groupId>
-                       <artifactId>jetty-servlet</artifactId>
-                       <version>9.2.3.v20140905</version>
-               </dependency>
-
-               <dependency>
-                       <groupId>com.sun.jersey</groupId>
-                       <artifactId>jersey-bundle</artifactId>
-                       <version>1.18.1</version>
-               </dependency>
-
-               <dependency>
-                       <groupId>com.sun.jersey</groupId>
-                       <artifactId>jersey-json</artifactId>
-                       <version>1.18.1</version>
-               </dependency>
-
-               <dependency>
-                       <groupId>org.apache.opennlp</groupId>
-                       <artifactId>opennlp-tools</artifactId>
-                       <version>1.6.0</version>
-               </dependency>
-
-               <dependency>
-                       <groupId>junit</groupId>
-                       <artifactId>junit</artifactId>
-                       <version>3.8.1</version>
-                       <scope>test</scope>
-               </dependency>
-       </dependencies>
-       <build>
-               <plugins>
-                       <plugin>
-                               <artifactId>maven-assembly-plugin</artifactId>
-                               <configuration>
-                                       <descriptorRefs>
-                                               
<descriptorRef>jar-with-dependencies</descriptorRef>
-                                       </descriptorRefs>
-                               </configuration>
-                               <executions>
-                                       <execution>
-                                               <id>make-assembly</id>
-                                               <phase>package</phase>
-                                               <goals>
-                                                       <goal>single</goal>
-                                               </goals>
-                                       </execution>
-                               </executions>
-                       </plugin>
-               </plugins>
-       </build>
-</project>

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/4350f64c/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
----------------------------------------------------------------------
diff --git 
a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
 
b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
deleted file mode 100644
index 60d2a1b..0000000
--- 
a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.bratann;
-
-import java.io.File;
-import java.util.Arrays;
-import java.util.List;
-
-import org.eclipse.jetty.server.Server;
-import org.eclipse.jetty.servlet.ServletContextHandler;
-import org.eclipse.jetty.servlet.ServletHolder;
-
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinder;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.sentdetect.NewlineSentenceDetector;
-import opennlp.tools.sentdetect.SentenceDetector;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.tokenize.SimpleTokenizer;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
-
-public class NameFinderAnnService {
-
-  public static SentenceDetector sentenceDetector = new 
NewlineSentenceDetector();;
-  public static Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
-  public static TokenNameFinder nameFinders[];
-
-  public static void main(String[] args) throws Exception {
-
-    if (args.length == 0) {
-      System.out.println(
-          "[-serverPort port] [-tokenizerModel file] [-ruleBasedTokenizer 
whitespace|simple] [-sentenceDetectorModel file] "
-              + "namefinderFile|nameFinderURI");
-      return;
-    }
-
-    List<String> argList = Arrays.asList(args);
-
-    int serverPort = 8080;
-    int serverPortIndex = argList.indexOf("-serverPort") + 1;
-
-    if (serverPortIndex > 0 && serverPortIndex < args.length) {
-      serverPort = Integer.parseInt(args[serverPortIndex]);
-    }
-
-    int sentenceModelIndex = argList.indexOf("-sentenceDetectorModel")
-        + 1;
-    if (sentenceModelIndex > 0 && sentenceModelIndex < args.length) {
-      sentenceDetector = new SentenceDetectorME(
-          new SentenceModel(new File(args[sentenceModelIndex])));
-    }
-
-    int ruleBasedTokenizerIndex = argList.indexOf("-ruleBasedTokenizer") + 1;
-
-    if (ruleBasedTokenizerIndex > 0 && ruleBasedTokenizerIndex < args.length) {
-      if ("whitespace".equals(args[ruleBasedTokenizerIndex])) {
-        tokenizer = WhitespaceTokenizer.INSTANCE;
-      } else if ("simple".equals(args[ruleBasedTokenizerIndex])) {
-        tokenizer = SimpleTokenizer.INSTANCE;
-      } else {
-        System.out
-        .println("unkown tokenizer: " + args[ruleBasedTokenizerIndex]);
-        return;
-      }
-    }
-
-    int tokenizerModelIndex = argList.indexOf("-tokenizerModel") + 1;
-    if (tokenizerModelIndex > 0 && tokenizerModelIndex < args.length) {
-      tokenizer = new TokenizerME(
-          new TokenizerModel(new File(args[tokenizerModelIndex])));
-    }
-
-    nameFinders = new TokenNameFinder[] { new NameFinderME(
-        new TokenNameFinderModel(new File(args[args.length - 1]))) };
-
-    ServletContextHandler context = new ServletContextHandler(
-        ServletContextHandler.SESSIONS);
-    context.setContextPath("/");
-
-    Server jettyServer = new Server(serverPort);
-    jettyServer.setHandler(context);
-
-    ServletHolder jerseyServlet = context
-        
.addServlet(com.sun.jersey.spi.container.servlet.ServletContainer.class, "/*");
-    jerseyServlet.setInitParameter("com.sun.jersey.config.property.packages",
-        "opennlp.bratann");
-    
jerseyServlet.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", 
"true");
-    jerseyServlet.setInitOrder(0);
-
-    jerseyServlet.setInitParameter("jersey.config.server.provider.classnames",
-        NameFinderResource.class.getCanonicalName());
-
-    try {
-      jettyServer.start();
-      jettyServer.join();
-    } finally {
-      jettyServer.destroy();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/4350f64c/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
----------------------------------------------------------------------
diff --git 
a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java 
b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
deleted file mode 100644
index 39cec0e..0000000
--- 
a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.bratann;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import javax.ws.rs.Consumes;
-import javax.ws.rs.POST;
-import javax.ws.rs.Path;
-import javax.ws.rs.Produces;
-import javax.ws.rs.QueryParam;
-import javax.ws.rs.core.MediaType;
-
-import opennlp.tools.namefind.TokenNameFinder;
-import opennlp.tools.sentdetect.SentenceDetector;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.util.Span;
-
-@Path("/ner")
-public class NameFinderResource {
-
-  public static class NameAnn {
-    public int[][] offsets;
-    public String[] texts;
-    public String type;
-  }
-
-  private SentenceDetector sentDetect = NameFinderAnnService.sentenceDetector;
-  private Tokenizer tokenizer = NameFinderAnnService.tokenizer;
-  private TokenNameFinder nameFinders[] = NameFinderAnnService.nameFinders;
-
-  private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset,
-      int endOffset) {
-
-    for (int i = beginOffset; i < endOffset; i++) {
-      if (!Character.isSpaceChar(s.charAt(i))) {
-        return i;
-      }
-    }
-
-    return -1;
-  }
-
-  @POST
-  @Consumes(MediaType.TEXT_PLAIN)
-  @Produces(MediaType.APPLICATION_JSON)
-  public Map<String, NameAnn> findNames(@QueryParam("model") String modelName,
-      String text) {
-
-    Span sentenceSpans[] = sentDetect.sentPosDetect(text);
-
-    Map<String, NameAnn> map = new HashMap<String, NameAnn>();
-
-    int indexCounter = 0;
-
-    for (int i = 0; i < sentenceSpans.length; i++) {
-      
-      String sentenceText = sentenceSpans[i].getCoveredText(text).toString();
-      
-      // offset of sentence gets lost here!
-      Span tokenSpans[] = tokenizer
-          .tokenizePos(sentenceText);
-
-      String tokens[] = Span.spansToStrings(tokenSpans, sentenceText);
-
-      for (TokenNameFinder nameFinder : nameFinders) {
-        Span names[] = nameFinder.find(tokens);
-
-        for (Span name : names) {
-          
-          int beginOffset = tokenSpans[name.getStart()].getStart()
-              + sentenceSpans[i].getStart();
-          int endOffset = tokenSpans[name.getEnd() - 1].getEnd()
-              + sentenceSpans[i].getStart();
-
-          // create a list of new line indexes
-          List<Integer> newLineIndexes = new ArrayList<Integer>();
-
-          // TODO: Code needs to handle case that there are multiple new lines
-          // in a row
-
-          boolean inNewLineSequence = false;
-          for (int ci = beginOffset; ci < endOffset; ci++) {
-            if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') {
-              if (!inNewLineSequence) {
-                newLineIndexes.add(ci);
-              }
-              inNewLineSequence = true;
-            } else {
-              inNewLineSequence = false;
-            }
-          }
-
-          List<String> textSegments = new ArrayList<String>();
-          List<int[]> spanSegments = new ArrayList<int[]>();
-
-          int segmentBegin = beginOffset;
-
-          for (int newLineOffset : newLineIndexes) {
-            // create segment from begin to offset
-            textSegments.add(text.substring(segmentBegin, newLineOffset));
-            spanSegments.add(new int[] { segmentBegin, newLineOffset });
-
-            segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1,
-                endOffset);
-
-            if (segmentBegin == -1) {
-              break;
-            }
-          }
-
-          // create left over segment
-          if (segmentBegin != -1) {
-            textSegments.add(text.substring(segmentBegin, endOffset));
-            spanSegments.add(new int[] { segmentBegin, endOffset });
-          }
-
-          NameAnn ann = new NameAnn();
-          ann.texts = textSegments.toArray(new String[textSegments.size()]);
-          ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]);
-          ann.type = name.getType();
-
-          map.put(Integer.toString(indexCounter++), ann);
-        }
-      }
-    }
-
-    return map;
-  }
-}

Reply via email to