Author: rwesten
Date: Wed Jun  5 08:14:45 2013
New Revision: 1489737

URL: http://svn.apache.org/r1489737
Log:
Implementation of STANBOL-1091

Modified:
    
stanbol/trunk/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java

Modified: 
stanbol/trunk/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java?rev=1489737&r1=1489736&r2=1489737&view=diff
==============================================================================
--- 
stanbol/trunk/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java
 (original)
+++ 
stanbol/trunk/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java
 Wed Jun  5 08:14:45 2013
@@ -86,6 +86,12 @@ public class ProcessingState {
      */
     private int consumedIndex = -1;
     /**
+     * Ensures that Tokens are not processed twice in case of multiple
+     * overlapping Sentence Annotations (e.g. if two NLP frameworks 
contributing
+     * Sentences do not agree with each other).
+     */
+    private int consumedSectionIndex = -1;
+    /**
      * The language of the text
      */
     private String language;
@@ -136,7 +142,7 @@ public class ProcessingState {
         Iterator<Sentence> sentences = at.getSentences();
         this.sections = sentences.hasNext() ? sentences : 
Collections.singleton(at).iterator();
         //init the first sentence
-        initNextSentence();
+        //initNextSentence();
     }
     /**
      * Getter for the current section. This is typically a {@link Sentence}
@@ -237,6 +243,14 @@ public class ProcessingState {
         boolean foundLinkableToken = false;
         while(!foundLinkableToken && sections.hasNext()){
             section = sections.next();
+            if(consumedSectionIndex > section.getStart()){
+                log.debug(" > skipping {} because an other section until Index 
{} " +
+                               "was already processed. This is not an error, 
but indicates that" +
+                               "multiple NLP framewords do contribute 
divergating Sentence annotations",
+                               section, consumedSectionIndex);
+                continue; //ignore this section
+            }
+            consumedSectionIndex = section.getEnd();
             tokens.clear(); //clear token for each section (STANBOL-818)
             Iterator<Span> enclosed = section.getEnclosed(enclosedSpanTypes);
             ChunkData activeChunk = null;


Reply via email to