Revision: 17769
          http://sourceforge.net/p/gate/code/17769
Author:   ian_roberts
Date:     2014-04-03 15:03:12 +0000 (Thu, 03 Apr 2014)
Log Message:
-----------
Ignore (with a warning) entity annotations that are not covered by a context
annotation, rather than throwing an impenetrable error.

Modified Paths:
--------------
    
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/classification/EntityClassificationJobBuilder.java

Modified: 
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/classification/EntityClassificationJobBuilder.java
===================================================================
--- 
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/classification/EntityClassificationJobBuilder.java
   2014-04-03 14:37:54 UTC (rev 17768)
+++ 
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/classification/EntityClassificationJobBuilder.java
   2014-04-03 15:03:12 UTC (rev 17769)
@@ -36,10 +36,14 @@
 import gate.crowdsource.rest.CrowdFlowerClient;
 import gate.gui.ActionsPublisher;
 
+import org.apache.log4j.Logger;
+
 @CreoleResource(name = "Entity Classification Job Builder",
    comment = "Build a CrowdFlower job asking users to select the right label 
for entities")
 public class EntityClassificationJobBuilder extends AbstractLanguageAnalyser 
implements ActionsPublisher {
 
+  private static final Logger log = 
Logger.getLogger(EntityClassificationJobBuilder.class);
+
   private static final long serialVersionUID = -1584716901194104888L;
 
   private String apiKey;
@@ -148,13 +152,20 @@
         fireProgressChanged((100 * entityIdx++) / allEntities.size());
         if(isInterrupted()) throw new ExecutionInterruptedException();
         AnnotationSet thisEntityContext = 
Utils.getCoveringAnnotations(contextAnnotations, entity);
-        // get the "closest" context, i.e. the shortest annotation in the 
covering set.
-        // usually we'd expect this set to contain just one annotation
-        Annotation context = Collections.min(thisEntityContext, 
ANNOTATION_LENGTH_COMPARATOR);
-        crowdFlowerClient.createClassificationUnit(jobId, getDocument(), 
entityASName, context, entity);
+        if(thisEntityContext.isEmpty()) {
+          log.warn(entityAnnotationType + " with ID " + entity.getId() +
+              " at offsets (" + Utils.start(entity) + ":" + Utils.end(entity) +
+              ") in document " + getDocument().getName() + 
+              " has no surrounding " + contextAnnotationType + " - ignored");
+        } else {
+          // get the "closest" context, i.e. the shortest annotation in the 
covering set.
+          // usually we'd expect this set to contain just one annotation
+          Annotation context = Collections.min(thisEntityContext, 
ANNOTATION_LENGTH_COMPARATOR);
+          crowdFlowerClient.createClassificationUnit(jobId, getDocument(), 
entityASName, context, entity);
+        }
       }
       fireProcessFinished();
-      fireStatusChanged(allEntities.size() + " units created");
+      fireStatusChanged(entityIdx + " units created");
     } finally {
       interrupted = false;
     }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to