Repository: opennlp-sandbox Updated Branches: refs/heads/master dce84c0a6 -> 4350f64c0
Move brat annotator to opennlp.git OPENNLP-867 Project: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/commit/4350f64c Tree: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/tree/4350f64c Diff: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/diff/4350f64c Branch: refs/heads/master Commit: 4350f64c009141bd80234113c39d30c61fa47020 Parents: dce84c0 Author: Jörn Kottmann <[email protected]> Authored: Wed Oct 19 23:42:13 2016 +0200 Committer: Jörn Kottmann <[email protected]> Committed: Wed Oct 19 23:42:13 2016 +0200 ---------------------------------------------------------------------- opennlp-brat-annotator/pom.xml | 88 ----------- .../opennlp/bratann/NameFinderAnnService.java | 119 --------------- .../opennlp/bratann/NameFinderResource.java | 148 ------------------- 3 files changed, 355 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/4350f64c/opennlp-brat-annotator/pom.xml ---------------------------------------------------------------------- diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml deleted file mode 100644 index 93e3620..0000000 --- a/opennlp-brat-annotator/pom.xml +++ /dev/null @@ -1,88 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - -<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor - license agreements. See the NOTICE file distributed with this work for additional - information regarding copyright ownership. The ASF licenses this file to - you under the Apache License, Version 2.0 (the "License"); you may not use - this file except in compliance with the License. You may obtain a copy of - the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required - by applicable law or agreed to in writing, software distributed under the - License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS - OF ANY KIND, either express or implied. See the License for the specific - language governing permissions and limitations under the License. --> - -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp-brat-annotator</artifactId> - <version>1.0-SNAPSHOT</version> - <packaging>jar</packaging> - - <name>opennlp-brat-annotator</name> - - <properties> - <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> - </properties> - - <dependencies> - <dependency> - <groupId>org.eclipse.jetty</groupId> - <artifactId>jetty-server</artifactId> - <version>9.2.3.v20140905</version> - </dependency> - - <dependency> - <groupId>org.eclipse.jetty</groupId> - <artifactId>jetty-servlet</artifactId> - <version>9.2.3.v20140905</version> - </dependency> - - <dependency> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-bundle</artifactId> - <version>1.18.1</version> - </dependency> - - <dependency> - <groupId>com.sun.jersey</groupId> - <artifactId>jersey-json</artifactId> - <version>1.18.1</version> - </dependency> - - <dependency> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp-tools</artifactId> - <version>1.6.0</version> - </dependency> - - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <version>3.8.1</version> - <scope>test</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <artifactId>maven-assembly-plugin</artifactId> - <configuration> - <descriptorRefs> - <descriptorRef>jar-with-dependencies</descriptorRef> - </descriptorRefs> - </configuration> - <executions> - <execution> - <id>make-assembly</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> - </execution> - </executions> - </plugin> - </plugins> - </build> -</project> http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/4350f64c/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java ---------------------------------------------------------------------- diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java deleted file mode 100644 index 60d2a1b..0000000 --- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.bratann; - -import java.io.File; -import java.util.Arrays; -import java.util.List; - -import org.eclipse.jetty.server.Server; -import org.eclipse.jetty.servlet.ServletContextHandler; -import org.eclipse.jetty.servlet.ServletHolder; - -import opennlp.tools.namefind.NameFinderME; -import opennlp.tools.namefind.TokenNameFinder; -import opennlp.tools.namefind.TokenNameFinderModel; -import opennlp.tools.sentdetect.NewlineSentenceDetector; -import opennlp.tools.sentdetect.SentenceDetector; -import opennlp.tools.sentdetect.SentenceDetectorME; -import opennlp.tools.sentdetect.SentenceModel; -import opennlp.tools.tokenize.SimpleTokenizer; -import opennlp.tools.tokenize.Tokenizer; -import opennlp.tools.tokenize.TokenizerME; -import opennlp.tools.tokenize.TokenizerModel; -import opennlp.tools.tokenize.WhitespaceTokenizer; - -public class NameFinderAnnService { - - public static SentenceDetector sentenceDetector = new NewlineSentenceDetector();; - public static Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE; - public static TokenNameFinder nameFinders[]; - - public static void main(String[] args) throws Exception { - - if (args.length == 0) { - System.out.println( - "[-serverPort port] [-tokenizerModel file] [-ruleBasedTokenizer whitespace|simple] [-sentenceDetectorModel file] " - + "namefinderFile|nameFinderURI"); - return; - } - - List<String> argList = Arrays.asList(args); - - int serverPort = 8080; - int serverPortIndex = argList.indexOf("-serverPort") + 1; - - if (serverPortIndex > 0 && serverPortIndex < args.length) { - serverPort = Integer.parseInt(args[serverPortIndex]); - } - - int sentenceModelIndex = argList.indexOf("-sentenceDetectorModel") - + 1; - if (sentenceModelIndex > 0 && sentenceModelIndex < args.length) { - sentenceDetector = new SentenceDetectorME( - new SentenceModel(new File(args[sentenceModelIndex]))); - } - - int ruleBasedTokenizerIndex = argList.indexOf("-ruleBasedTokenizer") + 1; - - if (ruleBasedTokenizerIndex > 0 && ruleBasedTokenizerIndex < args.length) { - if ("whitespace".equals(args[ruleBasedTokenizerIndex])) { - tokenizer = WhitespaceTokenizer.INSTANCE; - } else if ("simple".equals(args[ruleBasedTokenizerIndex])) { - tokenizer = SimpleTokenizer.INSTANCE; - } else { - System.out - .println("unkown tokenizer: " + args[ruleBasedTokenizerIndex]); - return; - } - } - - int tokenizerModelIndex = argList.indexOf("-tokenizerModel") + 1; - if (tokenizerModelIndex > 0 && tokenizerModelIndex < args.length) { - tokenizer = new TokenizerME( - new TokenizerModel(new File(args[tokenizerModelIndex]))); - } - - nameFinders = new TokenNameFinder[] { new NameFinderME( - new TokenNameFinderModel(new File(args[args.length - 1]))) }; - - ServletContextHandler context = new ServletContextHandler( - ServletContextHandler.SESSIONS); - context.setContextPath("/"); - - Server jettyServer = new Server(serverPort); - jettyServer.setHandler(context); - - ServletHolder jerseyServlet = context - .addServlet(com.sun.jersey.spi.container.servlet.ServletContainer.class, "/*"); - jerseyServlet.setInitParameter("com.sun.jersey.config.property.packages", - "opennlp.bratann"); - jerseyServlet.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", "true"); - jerseyServlet.setInitOrder(0); - - jerseyServlet.setInitParameter("jersey.config.server.provider.classnames", - NameFinderResource.class.getCanonicalName()); - - try { - jettyServer.start(); - jettyServer.join(); - } finally { - jettyServer.destroy(); - } - } -} http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/4350f64c/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java ---------------------------------------------------------------------- diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java deleted file mode 100644 index 39cec0e..0000000 --- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.bratann; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import javax.ws.rs.Consumes; -import javax.ws.rs.POST; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; -import javax.ws.rs.QueryParam; -import javax.ws.rs.core.MediaType; - -import opennlp.tools.namefind.TokenNameFinder; -import opennlp.tools.sentdetect.SentenceDetector; -import opennlp.tools.tokenize.Tokenizer; -import opennlp.tools.util.Span; - -@Path("/ner") -public class NameFinderResource { - - public static class NameAnn { - public int[][] offsets; - public String[] texts; - public String type; - } - - private SentenceDetector sentDetect = NameFinderAnnService.sentenceDetector; - private Tokenizer tokenizer = NameFinderAnnService.tokenizer; - private TokenNameFinder nameFinders[] = NameFinderAnnService.nameFinders; - - private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset, - int endOffset) { - - for (int i = beginOffset; i < endOffset; i++) { - if (!Character.isSpaceChar(s.charAt(i))) { - return i; - } - } - - return -1; - } - - @POST - @Consumes(MediaType.TEXT_PLAIN) - @Produces(MediaType.APPLICATION_JSON) - public Map<String, NameAnn> findNames(@QueryParam("model") String modelName, - String text) { - - Span sentenceSpans[] = sentDetect.sentPosDetect(text); - - Map<String, NameAnn> map = new HashMap<String, NameAnn>(); - - int indexCounter = 0; - - for (int i = 0; i < sentenceSpans.length; i++) { - - String sentenceText = sentenceSpans[i].getCoveredText(text).toString(); - - // offset of sentence gets lost here! - Span tokenSpans[] = tokenizer - .tokenizePos(sentenceText); - - String tokens[] = Span.spansToStrings(tokenSpans, sentenceText); - - for (TokenNameFinder nameFinder : nameFinders) { - Span names[] = nameFinder.find(tokens); - - for (Span name : names) { - - int beginOffset = tokenSpans[name.getStart()].getStart() - + sentenceSpans[i].getStart(); - int endOffset = tokenSpans[name.getEnd() - 1].getEnd() - + sentenceSpans[i].getStart(); - - // create a list of new line indexes - List<Integer> newLineIndexes = new ArrayList<Integer>(); - - // TODO: Code needs to handle case that there are multiple new lines - // in a row - - boolean inNewLineSequence = false; - for (int ci = beginOffset; ci < endOffset; ci++) { - if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') { - if (!inNewLineSequence) { - newLineIndexes.add(ci); - } - inNewLineSequence = true; - } else { - inNewLineSequence = false; - } - } - - List<String> textSegments = new ArrayList<String>(); - List<int[]> spanSegments = new ArrayList<int[]>(); - - int segmentBegin = beginOffset; - - for (int newLineOffset : newLineIndexes) { - // create segment from begin to offset - textSegments.add(text.substring(segmentBegin, newLineOffset)); - spanSegments.add(new int[] { segmentBegin, newLineOffset }); - - segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1, - endOffset); - - if (segmentBegin == -1) { - break; - } - } - - // create left over segment - if (segmentBegin != -1) { - textSegments.add(text.substring(segmentBegin, endOffset)); - spanSegments.add(new int[] { segmentBegin, endOffset }); - } - - NameAnn ann = new NameAnn(); - ann.texts = textSegments.toArray(new String[textSegments.size()]); - ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]); - ann.type = name.getType(); - - map.put(Integer.toString(indexCounter++), ann); - } - } - } - - return map; - } -}
