Revision: 17888
http://sourceforge.net/p/gate/code/17888
Author: markagreenwood
Date: 2014-04-21 09:58:53 +0000 (Mon, 21 Apr 2014)
Log Message:
-----------
applied patch #40 from sourceforge to give us a wrapper for the OpenNLP parser
-- I had to modify the patch slightly due to changes in OpenNLP since it was
submitted in 2010, but the output it produces looks reasonable. I haven't
included the parser in the default OpenNLP app though
Added Paths:
-----------
gate/trunk/plugins/OpenNLP/src/gate/opennlp/OpenNlpParser.java
Added: gate/trunk/plugins/OpenNLP/src/gate/opennlp/OpenNlpParser.java
===================================================================
--- gate/trunk/plugins/OpenNLP/src/gate/opennlp/OpenNlpParser.java
(rev 0)
+++ gate/trunk/plugins/OpenNLP/src/gate/opennlp/OpenNlpParser.java
2014-04-21 09:58:53 UTC (rev 17888)
@@ -0,0 +1,178 @@
+package gate.opennlp;
+
+import gate.Annotation;
+import gate.AnnotationSet;
+import gate.FeatureMap;
+import gate.Resource;
+import gate.creole.AbstractLanguageAnalyser;
+import gate.creole.ExecutionException;
+import gate.creole.ResourceInstantiationException;
+import gate.creole.metadata.CreoleParameter;
+import gate.creole.metadata.CreoleResource;
+import gate.creole.metadata.Optional;
+import gate.creole.metadata.RunTime;
+
+import java.io.InputStream;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import opennlp.tools.parser.Parse;
+import opennlp.tools.parser.Parser;
+import opennlp.tools.parser.ParserFactory;
+import opennlp.tools.parser.ParserModel;
+import opennlp.tools.util.Span;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.log4j.Logger;
+
+/**
+ * Wrapper for the opennlp parser
+ */
+@CreoleResource(name="OpenNLP Parser")
+public class OpenNlpParser extends AbstractLanguageAnalyser {
+
+ public static final long serialVersionUID = 1L;
+
+ private static final Logger logger = Logger.getLogger(OpenNlpChunker.class);
+
+ String inputASName;
+
+ public String getInputASName() {
+ return inputASName;
+ }
+
+ @CreoleParameter
+ @RunTime
+ @Optional
+ public void setInputASName(String inputASName) {
+ this.inputASName = inputASName;
+ }
+
+ Parser parser = null;
+
+ private URL model;
+
+ private AnnotationSet annotations;
+
+ @Override
+ public void execute() throws ExecutionException {
+ // text doc annotations
+ if(inputASName != null && inputASName.length() > 0) {
+ annotations = document.getAnnotations(inputASName);
+ } else {
+ annotations = document.getAnnotations();
+ }
+
+ // get token and sentence annotations
+ AnnotationSet sentences = annotations.get("Sentence");
+ AnnotationSet tokensAS = annotations.get("Token");
+
+ if(sentences != null && sentences.size() > 0 && tokensAS != null &&
+ tokensAS.size() > 0) {
+
+ List<Annotation> sentList = new ArrayList<Annotation>(sentences);
+ java.util.Collections.sort(sentList, new gate.util.OffsetComparator());
+
+ try {
+ for(Annotation annotation : sentList) {
+ AnnotationSet sentenceTokens =
+ annotations.get("Token", annotation.getStartNode().getOffset(),
+ annotation.getEndNode().getOffset());
+
+ List<Annotation> annList = new ArrayList<Annotation>(sentenceTokens);
+ Collections.sort(annList, new gate.util.OffsetComparator());
+
+ Long sentStart = annotation.getStartNode().getOffset();
+ Long sentEnd = annotation.getEndNode().getOffset();
+ String text =
+ document.getContent().getContent(sentStart, sentEnd).toString();
+ Parse parse =
+ new Parse(text, new Span(0, text.length()), "INC", 1, null);
+
+ for(Annotation ann : annList) {
+ Long start = ann.getStartNode().getOffset() - sentStart;
+ Long end = ann.getEndNode().getOffset() - sentStart;
+ parse.insert(new Parse(text, new Span(start.intValue(), end
+ .intValue()), "TK", 0, 0));
+ }
+
+ Parse result = parser.parse(parse);
+
+ annotate(result, sentStart);
+ }
+ } catch(gate.util.InvalidOffsetException e) {
+ e.printStackTrace();
+ throw new ExecutionException(e);
+ }
+ } else {
+ throw new ExecutionException("No sentences or tokens to process!\n"
+ + "Please run a sentence splitter " + "and tokeniser first!");
+ }
+ }
+
+ private Integer annotate(Parse p, Long sentStart)
+ throws gate.util.InvalidOffsetException {
+
+ List<Integer> childIDs = new ArrayList<Integer>();
+ Parse[] children = p.getChildren();
+ for(Parse cp : children) {
+ Integer childID = annotate(cp, sentStart);
+ if(childID >= 0) childIDs.add(childID);
+ }
+
+ String type = p.getType();
+ if(type.equals("TK")) return -1;
+
+ Span span = p.getSpan();
+ Long start = sentStart + span.getStart();
+ Long end = sentStart + span.getEnd();
+
+ FeatureMap fm = gate.Factory.newFeatureMap();
+ String text = document.getContent().getContent(start, end).toString();
+ fm.put("text", text);
+ fm.put("cat", p.getType());
+ if(!childIDs.isEmpty()) fm.put("consists", childIDs);
+
+ return annotations.add(start, end, "SyntaxTreeNode", fm);
+ }
+
+ public URL getModel() {
+ return model;
+ }
+
+ /* getters and setters for the PR */
+ /* public members */
+
+ @Override
+ public Resource init() throws ResourceInstantiationException {
+ InputStream modelIn = null;
+ try {
+ modelIn = model.openStream();
+
+ ParserModel model = new ParserModel(modelIn);
+
+ parser = ParserFactory.create(model);
+ } catch(Exception e) {
+ e.printStackTrace();
+ logger.error("Parser can not be initialized!");
+ throw new RuntimeException("Parser cannot be initialized!", e);
+ } finally {
+ IOUtils.closeQuietly(modelIn);
+ }
+ return this;
+ }
+
+ @Override
+ public void reInit() throws ResourceInstantiationException {
+ init();
+ }
+
+ @CreoleParameter(defaultValue = "models/english/en-parser-chunking.bin",
+ comment = "location of the parser model")
+ public void setModel(URL model) {
+ this.model = model;
+ }
+
+}
\ No newline at end of file
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Start Your Social Network Today - Download eXo Platform
Build your Enterprise Intranet with eXo Platform Software
Java Based Open Source Intranet - Social, Extensible, Cloud Ready
Get Started Now And Turn Your Intranet Into A Collaboration Platform
http://p.sf.net/sfu/ExoPlatform
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs