Repository: opennlp
Updated Branches:
  refs/heads/881 [created] 124202d8a

Improve error reporting on parser crashes

Parsing errors are now catched and wrapped in
an IOException containing the document id for
tracking down the actual file.

See issue OPENNLP-881


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/124202d8
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/124202d8
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/124202d8

Branch: refs/heads/881
Commit: 124202d8a25aa08914f4793ef460f2f6e74c26aa
Parents: 92e541c
Author: Jörn Kottmann <[email protected]>
Authored: Thu Nov 10 01:07:14 2016 +0100
Committer: Jörn Kottmann <[email protected]>
Committed: Thu Nov 10 01:07:14 2016 +0100

----------------------------------------------------------------------
 .../formats/brat/BratAnnotationStream.java      | 25 +++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/124202d8/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
index 9d4b0f2..6e6cb11 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/brat/BratAnnotationStream.java
@@ -82,9 +82,14 @@ public class BratAnnotationStream implements 
ObjectStream<BratAnnotation> {
         String coveredText = 
line.subSequence(values[firstTextTokenIndex].getStart(),
             values[values.length - 1].getEnd()).toString();
 
-        return new SpanAnnotation(id, type,
-            new Span(parseInt(values[BEGIN_OFFSET]
-                .getCoveredText(line).toString()), endOffset, type), 
coveredText);
+        try {
+          return new SpanAnnotation(id, type,
+                  new Span(parseInt(values[BEGIN_OFFSET]
+                          .getCoveredText(line).toString()), endOffset, type), 
coveredText);
+        }
+        catch (IllegalArgumentException e) {
+          throw new InvalidFormatException(e);
+        }
       }
       else {
         throw new InvalidFormatException("Line must have at least 5 fields");
@@ -173,18 +178,20 @@ public class BratAnnotationStream implements 
ObjectStream<BratAnnotation> {
         BratAnnotationParser parser = parsers.get(typeClass);
 
         if (parser == null) {
-          throw new IOException("Failed to parse ann document with id " + id +
+          throw new IOException("Failed to parse ann document with id " + id + 
".ann" +
               " type class, no parser registered: " + 
tokens[BratAnnotationParser.TYPE_OFFSET]
               .getCoveredText(line).toString());
         }
 
-        return parser.parse(tokens, line);
+        try {
+          return parser.parse(tokens, line);
+        }
+        catch (IOException e)  {
+          throw new IOException(String.format("Failed to parse ann document 
with id [%s.ann]", id), e);
+        }
       }
     }
-    else {
-      return null;
-    }
-
+    
     return null;
   }
 

Reply via email to