This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch OPENNLP-1634-Move-OpenNLP-Brat-Annotator-back-to-Sandbox in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 3e1d1c5453becb44393922759b4cc2f582237fdb Author: Richard Zowalla <[email protected]> AuthorDate: Tue Oct 29 13:57:07 2024 +0100 OPENNLP-1634 - Move OpenNLP Brat Annotator back to Sandbox --- README.md | 1 - opennlp-brat-annotator/pom.xml | 117 ----------------- .../src/main/bin/brat-annotation-service | 56 --------- .../src/main/bin/brat-annotation-service.bat | 51 -------- .../java/opennlp/bratann/NameFinderAnnService.java | 102 --------------- .../java/opennlp/bratann/NameFinderResource.java | 138 --------------------- opennlp-distr/pom.xml | 6 +- opennlp-distr/src/main/assembly/bin.xml | 14 --- pom.xml | 11 +- 9 files changed, 2 insertions(+), 494 deletions(-) diff --git a/README.md b/README.md index 495b2f2a..25d38206 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,6 @@ Currently, the library has different packages: * `opennlp-tools` : The core toolkit. * `opennlp-tools-models` : A set of classes to load [OpenNLP models](https://github.com/apache/opennlp-models) from the classpath. * `opennlp-uima` : A set of [Apache UIMA](https://uima.apache.org) annotators. -* `opennlp-brat-annotator` : A set of annotators for [BRAT](http://brat.nlplab.org/) * `opennlp-morfologik-addon` : An addon for Morfologik * `opennlp-dl` : OpenNLP interface implementations for ONNX models using the `onnxruntime` dependency. * `opennlp-dl-gpu` : Replaces `onnxruntime` with the `onnxruntime_gpu` dependency to support GPU acceleration. diff --git a/opennlp-brat-annotator/pom.xml b/opennlp-brat-annotator/pom.xml deleted file mode 100644 index 58426af5..00000000 --- a/opennlp-brat-annotator/pom.xml +++ /dev/null @@ -1,117 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - -<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor - license agreements. See the NOTICE file distributed with this work for additional - information regarding copyright ownership. The ASF licenses this file to - you under the Apache License, Version 2.0 (the "License"); you may not use - this file except in compliance with the License. You may obtain a copy of - the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required - by applicable law or agreed to in writing, software distributed under the - License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS - OF ANY KIND, either express or implied. See the License for the specific - language governing permissions and limitations under the License. --> - -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <parent> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp</artifactId> - <version>2.4.1-SNAPSHOT</version> - <relativePath>../pom.xml</relativePath> - </parent> - - <artifactId>opennlp-brat-annotator</artifactId> - <packaging>jar</packaging> - - <name>Apache OpenNLP Brat Annotator</name> - - <properties> - <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> - </properties> - - <dependencies> - <dependency> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-api</artifactId> - </dependency> - - <dependency> - <groupId>org.glassfish.jersey.containers</groupId> - <artifactId>jersey-container-grizzly2-http</artifactId> - <version>${jersey.version}</version> - </dependency> - - <dependency> - <groupId>org.glassfish.jersey.media</groupId> - <artifactId>jersey-media-json-jackson</artifactId> - <version>${jersey.version}</version> - <scope>runtime</scope> - </dependency> - - <dependency> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-annotations</artifactId> - <version>${jackson.version}</version> - <scope>runtime</scope> - </dependency> - - <dependency> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-databind</artifactId> - <version>${jackson.version}</version> - <scope>runtime</scope> - </dependency> - - <dependency> - <groupId>com.fasterxml.jackson.module</groupId> - <artifactId>jackson-module-jaxb-annotations</artifactId> - <version>${jackson.version}</version> - <scope>runtime</scope> - </dependency> - - <dependency> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp-tools</artifactId> - </dependency> - - <dependency> - <groupId>org.junit.jupiter</groupId> - <artifactId>junit-jupiter-api</artifactId> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.junit.jupiter</groupId> - <artifactId>junit-jupiter-engine</artifactId> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-simple</artifactId> - <scope>test</scope> - </dependency> - </dependencies> - <build> - <plugins> - <plugin> - <artifactId>maven-assembly-plugin</artifactId> - <configuration> - <descriptorRefs> - <descriptorRef>jar-with-dependencies</descriptorRef> - </descriptorRefs> - </configuration> - <executions> - <execution> - <id>make-assembly</id> - <phase>package</phase> - <goals> - <goal>single</goal> - </goals> - </execution> - </executions> - </plugin> - </plugins> - </build> -</project> diff --git a/opennlp-brat-annotator/src/main/bin/brat-annotation-service b/opennlp-brat-annotator/src/main/bin/brat-annotation-service deleted file mode 100755 index eac95668..00000000 --- a/opennlp-brat-annotator/src/main/bin/brat-annotation-service +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/sh - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Note: Do not output anything in this script file, any output -# may be inadvertantly placed in any output files if -# output redirection is used. - -# determine OPENNLP_HOME - $0 may be a symlink to OpenNLP's home -PRG="$0" - -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG="`dirname "$PRG"`/$link" - fi -done - -saveddir=`pwd` - -OPENNLP_HOME=`dirname "$PRG"`/.. - -# make it fully qualified -OPENNLP_HOME=`cd "$OPENNLP_HOME" && pwd` - -cd "$saveddir" - -if [ -z "$JAVACMD" ] ; then - if [ -n "$JAVA_HOME" ] ; then - JAVACMD="$JAVA_HOME/bin/java" - else - JAVACMD="`which java`" - fi -fi - -CLASSPATH=$(echo $OPENNLP_HOME/lib/*.jar | tr ' ' ':') - -$JAVACMD -Xmx1024m -Dlog4j.configurationFile="$OPENNLP_HOME/conf/log4j2.xml" -cp "$CLASSPATH" opennlp.bratann.NameFinderAnnService $@ diff --git a/opennlp-brat-annotator/src/main/bin/brat-annotation-service.bat b/opennlp-brat-annotator/src/main/bin/brat-annotation-service.bat deleted file mode 100755 index 289248b4..00000000 --- a/opennlp-brat-annotator/src/main/bin/brat-annotation-service.bat +++ /dev/null @@ -1,51 +0,0 @@ -@ECHO off - -REM # Licensed to the Apache Software Foundation (ASF) under one -REM # or more contributor license agreements. See the NOTICE file -REM # distributed with this work for additional information -REM # regarding copyright ownership. The ASF licenses this file -REM # to you under the Apache License, Version 2.0 (the -REM # "License"); you may not use this file except in compliance -REM # with the License. You may obtain a copy of the License at -REM # -REM # http://www.apache.org/licenses/LICENSE-2.0 -REM # -REM # Unless required by applicable law or agreed to in writing, -REM # software distributed under the License is distributed on an -REM # # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -REM # KIND, either express or implied. See the License for the -REM # specific language governing permissions and limitations -REM # under the License. - -REM # Note: Do not output anything in this script file, any output -REM # may be inadvertantly placed in any output files if -REM # output redirection is used. -SETLOCAL - -IF "%JAVA_CMD%" == "" ( - IF "%JAVA_HOME%" == "" ( - SET JAVA_CMD=java - ) ELSE ( - REM # Keep JAVA_HOME to short-name without spaces - FOR %%A IN ("%JAVA_HOME%") DO SET JAVA_CMD=%%~sfA\bin\java - ) -) - -REM # Should work with Windows XP and greater. If not, specify the path to where it is installed. -IF "%OPENNLP_HOME%" == "" ( - SET OPENNLP_HOME=%~sp0.. -) ELSE ( - REM # Keep OPENNLP_HOME to short-name without spaces - FOR %%A IN ("%OPENNLP_HOME%") DO SET OPENNLP_HOME=%%~sfA -) -setLocal EnableDelayedExpansion -set CLASSPATH=" - -FOR %%A IN ("%OPENNLP_HOME%\lib\*.jar") DO ( - set CLASSPATH=!CLASSPATH!;%%A -) -set CLASSPATH=!CLASSPATH!" - -%JAVA_CMD% -Xmx1024m "-Dlog4j.configurationFile=%OPENNLP_HOME%\conf\log4j2.xml" -cp %CLASSPATH% opennlp.bratann.NameFinderAnnService %* - -ENDLOCAL \ No newline at end of file diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java deleted file mode 100644 index 1735cb88..00000000 --- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderAnnService.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.bratann; - -import java.io.File; -import java.net.URI; -import java.util.Arrays; -import java.util.List; - -import jakarta.ws.rs.core.UriBuilder; -import org.glassfish.jersey.grizzly2.httpserver.GrizzlyHttpServerFactory; -import org.glassfish.jersey.server.ResourceConfig; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import opennlp.tools.namefind.NameFinderME; -import opennlp.tools.namefind.TokenNameFinder; -import opennlp.tools.namefind.TokenNameFinderModel; -import opennlp.tools.sentdetect.NewlineSentenceDetector; -import opennlp.tools.sentdetect.SentenceDetector; -import opennlp.tools.sentdetect.SentenceDetectorME; -import opennlp.tools.sentdetect.SentenceModel; -import opennlp.tools.tokenize.SimpleTokenizer; -import opennlp.tools.tokenize.Tokenizer; -import opennlp.tools.tokenize.TokenizerME; -import opennlp.tools.tokenize.TokenizerModel; -import opennlp.tools.tokenize.WhitespaceTokenizer; - -public class NameFinderAnnService { - - private static final Logger LOG = LoggerFactory.getLogger(NameFinderAnnService.class); - static SentenceDetector sentenceDetector = new NewlineSentenceDetector(); - static Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE; - static TokenNameFinder[] nameFinders; - - public static void main(String[] args) throws Exception { - - if (args.length == 0) { - LOG.info("Usage:"); - LOG.info("[NameFinderAnnService -serverPort port] [-tokenizerModel file] " - + "[-ruleBasedTokenizer whitespace|simple] " - + "[-sentenceDetectorModel file] namefinderFile|nameFinderURI"); - return; - } - - List<String> argList = Arrays.asList(args); - - int serverPort = 8080; - int serverPortIndex = argList.indexOf("-serverPort") + 1; - - if (serverPortIndex > 0 && serverPortIndex < args.length) { - serverPort = Integer.parseInt(args[serverPortIndex]); - } - - int sentenceModelIndex = argList.indexOf("-sentenceDetectorModel") + 1; - if (sentenceModelIndex > 0 && sentenceModelIndex < args.length) { - sentenceDetector = new SentenceDetectorME( - new SentenceModel(new File(args[sentenceModelIndex]))); - } - - int ruleBasedTokenizerIndex = argList.indexOf("-ruleBasedTokenizer") + 1; - - if (ruleBasedTokenizerIndex > 0 && ruleBasedTokenizerIndex < args.length) { - if ("whitespace".equals(args[ruleBasedTokenizerIndex])) { - tokenizer = WhitespaceTokenizer.INSTANCE; - } else if ("simple".equals(args[ruleBasedTokenizerIndex])) { - tokenizer = SimpleTokenizer.INSTANCE; - } else { - LOG.error("unknown tokenizer: {}", args[ruleBasedTokenizerIndex]); - return; - } - } - - int tokenizerModelIndex = argList.indexOf("-tokenizerModel") + 1; - if (tokenizerModelIndex > 0 && tokenizerModelIndex < args.length) { - tokenizer = new TokenizerME( - new TokenizerModel(new File(args[tokenizerModelIndex]))); - } - - nameFinders = new TokenNameFinder[] {new NameFinderME( - new TokenNameFinderModel(new File(args[args.length - 1])))}; - - URI baseUri = UriBuilder.fromUri("http://localhost/").port(serverPort).build(); - ResourceConfig config = new ResourceConfig(NameFinderResource.class); - GrizzlyHttpServerFactory.createHttpServer(baseUri, config); - } -} diff --git a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java b/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java deleted file mode 100644 index f824c18c..00000000 --- a/opennlp-brat-annotator/src/main/java/opennlp/bratann/NameFinderResource.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.bratann; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import jakarta.ws.rs.Consumes; -import jakarta.ws.rs.POST; -import jakarta.ws.rs.Path; -import jakarta.ws.rs.Produces; -import jakarta.ws.rs.QueryParam; -import jakarta.ws.rs.core.MediaType; - -import opennlp.tools.namefind.TokenNameFinder; -import opennlp.tools.sentdetect.SentenceDetector; -import opennlp.tools.tokenize.Tokenizer; -import opennlp.tools.util.Span; - -@Path("/ner") -public class NameFinderResource { - - private final SentenceDetector sentDetect = NameFinderAnnService.sentenceDetector; - private final Tokenizer tokenizer = NameFinderAnnService.tokenizer; - private final TokenNameFinder[] nameFinders = NameFinderAnnService.nameFinders; - - private static int findNextNonWhitespaceChar(CharSequence s, int beginOffset, int endOffset) { - for (int i = beginOffset; i < endOffset; i++) { - if (!Character.isSpaceChar(s.charAt(i))) { - return i; - } - } - return -1; - } - - @POST - @Consumes(MediaType.TEXT_PLAIN) - @Produces(MediaType.APPLICATION_JSON) - public Map<String, NameAnn> findNames(@QueryParam("model") String modelName, String text) { - Span[] sentenceSpans = sentDetect.sentPosDetect(text); - Map<String, NameAnn> map = new HashMap<>(); - - int indexCounter = 0; - - for (Span sentenceSpan : sentenceSpans) { - - String sentenceText = sentenceSpan.getCoveredText(text).toString(); - - // offset of sentence gets lost here! - Span[] tokenSpans = tokenizer.tokenizePos(sentenceText); - - String[] tokens = Span.spansToStrings(tokenSpans, sentenceText); - - for (TokenNameFinder nameFinder : nameFinders) { - Span[] names = nameFinder.find(tokens); - - for (Span name : names) { - - int beginOffset = tokenSpans[name.getStart()].getStart() + sentenceSpan.getStart(); - int endOffset = tokenSpans[name.getEnd() - 1].getEnd() + sentenceSpan.getStart(); - - // create a list of new line indexes - List<Integer> newLineIndexes = new ArrayList<>(); - - // TODO: Code needs to handle case that there are multiple new lines - // in a row - - boolean inNewLineSequence = false; - for (int ci = beginOffset; ci < endOffset; ci++) { - if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') { - if (!inNewLineSequence) { - newLineIndexes.add(ci); - } - inNewLineSequence = true; - } else { - inNewLineSequence = false; - } - } - - List<String> textSegments = new ArrayList<>(); - List<int[]> spanSegments = new ArrayList<>(); - - int segmentBegin = beginOffset; - - for (int newLineOffset : newLineIndexes) { - // create segment from begin to offset - textSegments.add(text.substring(segmentBegin, newLineOffset)); - spanSegments.add(new int[] {segmentBegin, newLineOffset}); - - segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1, - endOffset); - - if (segmentBegin == -1) { - break; - } - } - - // create left over segment - if (segmentBegin != -1) { - textSegments.add(text.substring(segmentBegin, endOffset)); - spanSegments.add(new int[] {segmentBegin, endOffset}); - } - - NameAnn ann = new NameAnn(); - ann.texts = textSegments.toArray(new String[0]); - ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]); - ann.type = name.getType(); - - map.put(Integer.toString(indexCounter++), ann); - } - } - } - return map; - } - - public static class NameAnn { - public int[][] offsets; - public String[] texts; - public String type; - } -} diff --git a/opennlp-distr/pom.xml b/opennlp-distr/pom.xml index cb5bfcb1..9d87ca91 100644 --- a/opennlp-distr/pom.xml +++ b/opennlp-distr/pom.xml @@ -50,11 +50,7 @@ <groupId>org.apache.opennlp</groupId> <artifactId>opennlp-morfologik-addon</artifactId> </dependency> - <dependency> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp-brat-annotator</artifactId> - </dependency> - <!-- ship the dist with a logging impl for cli users --> + <!-- ship the dist with a logging impl for cli users --> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> diff --git a/opennlp-distr/src/main/assembly/bin.xml b/opennlp-distr/src/main/assembly/bin.xml index 1a92bfd7..1c8d7d8b 100644 --- a/opennlp-distr/src/main/assembly/bin.xml +++ b/opennlp-distr/src/main/assembly/bin.xml @@ -100,13 +100,6 @@ <outputDirectory>bin</outputDirectory> </fileSet> - <fileSet> - <directory>../opennlp-brat-annotator/src/main/bin</directory> - <fileMode>755</fileMode> - <directoryMode>755</directoryMode> - <outputDirectory>bin</outputDirectory> - </fileSet> - <fileSet> <directory>../opennlp-tools/lang</directory> <fileMode>644</fileMode> @@ -128,13 +121,6 @@ <outputDirectory>docs/apidocs/opennlp-tools</outputDirectory> </fileSet> - <fileSet> - <directory>../opennlp-brat-annotator/target/apidocs</directory> - <fileMode>644</fileMode> - <directoryMode>755</directoryMode> - <outputDirectory>docs/apidocs/opennlp-brat-annotator</outputDirectory> - </fileSet> - <fileSet> <directory>../opennlp-morfologik-addon/target/apidocs</directory> <fileMode>644</fileMode> diff --git a/pom.xml b/pom.xml index 871442cb..d9fd7476 100644 --- a/pom.xml +++ b/pom.xml @@ -147,13 +147,7 @@ <version>${project.version}</version> </dependency> - <dependency> - <artifactId>opennlp-brat-annotator</artifactId> - <groupId>${project.groupId}</groupId> - <version>${project.version}</version> - </dependency> - - <dependency> + <dependency> <artifactId>opennlp-morfologik-addon</artifactId> <groupId>${project.groupId}</groupId> <version>${project.version}</version> @@ -168,8 +162,6 @@ <maven.compiler.release>${java.version}</maven.compiler.release> <maven.compiler.target>${java.version}</maven.compiler.target> <maven.version>3.3.9</maven.version> - <jackson.version>2.18.1</jackson.version> - <jersey.version>3.1.9</jersey.version> <junit.version>5.11.3</junit.version> <junit5-system-exit.version>2.0.2</junit5-system-exit.version> <uimaj.version>3.5.0</uimaj.version> @@ -545,7 +537,6 @@ <modules> <module>opennlp-tools</module> <module>opennlp-uima</module> - <module>opennlp-brat-annotator</module> <module>opennlp-morfologik-addon</module> <module>opennlp-docs</module> <module>opennlp-distr</module>
