Revision: 17796
http://sourceforge.net/p/gate/code/17796
Author: ian_roberts
Date: 2014-04-10 12:28:34 +0000 (Thu, 10 Apr 2014)
Log Message:
-----------
Added "detail" support for annotation jobs.
Modified Paths:
--------------
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/ne/EntityAnnotationJobBuilder.java
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/rest/CrowdFlowerClient.java
Modified:
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/ne/EntityAnnotationJobBuilder.java
===================================================================
---
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/ne/EntityAnnotationJobBuilder.java
2014-04-10 12:00:46 UTC (rev 17795)
+++
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/ne/EntityAnnotationJobBuilder.java
2014-04-10 12:28:34 UTC (rev 17796)
@@ -33,35 +33,37 @@
import javax.swing.Action;
-@CreoleResource(name = "Entity Annotation Job Builder",
- comment = "Build a CrowdFlower job asking users to annotate entities within
a snippet of text")
-public class EntityAnnotationJobBuilder extends AbstractLanguageAnalyser
implements ActionsPublisher {
+@CreoleResource(name = "Entity Annotation Job Builder", comment = "Build a
CrowdFlower job asking users to annotate entities within a snippet of text")
+public class EntityAnnotationJobBuilder extends AbstractLanguageAnalyser
+
implements
+
ActionsPublisher {
private static final long serialVersionUID = -1584716901194104888L;
private String apiKey;
-
+
private Long jobId;
-
+
private String snippetAnnotationType;
-
+
private String snippetASName;
-
+
private String tokenAnnotationType;
-
+
private String tokenASName;
-
+
+ private String detailFeatureName;
+
private String goldFeatureName;
-
+
private String goldFeatureValue;
-
+
private String goldReasonFeatureName;
private String entityAnnotationType;
-
+
private String entityASName;
-
protected CrowdFlowerClient crowdFlowerClient;
public String getApiKey() {
@@ -88,9 +90,8 @@
}
@RunTime
- @CreoleParameter(defaultValue = "Sentence",
- comment = "Annotation type for the \"snippet\" annotations. " +
- "One snippet = one CrowdFlower unit")
+ @CreoleParameter(defaultValue = "Sentence", comment = "Annotation type for
the \"snippet\" annotations. "
+ + "One snippet = one CrowdFlower unit")
public void setSnippetAnnotationType(String contextAnnotationType) {
this.snippetAnnotationType = contextAnnotationType;
}
@@ -111,9 +112,9 @@
}
@RunTime
- @CreoleParameter(defaultValue = "Token",
- comment = "Annotation type representing the \"tokens\" - the atomic
" +
- "units that workers will have to select to mark entity
annotations.")
+ @CreoleParameter(defaultValue = "Token", comment = "Annotation type "
+ + "representing the \"tokens\" - the atomic units that "
+ + "workers will have to select to mark entity annotations.")
public void setTokenAnnotationType(String tokenAnnotationType) {
this.tokenAnnotationType = tokenAnnotationType;
}
@@ -129,14 +130,29 @@
this.tokenASName = tokenASName;
}
+ public String getDetailFeatureName() {
+ return detailFeatureName;
+ }
+
+ @Optional
+ @RunTime
+ @CreoleParameter(defaultValue = "detail", comment = "Feature on the "
+ + "snippet annotations containing additional details to be shown "
+ + "to the annotators. This is interpreted as HTML, and can be "
+ + "used for example to show a list of clickable links extracted "
+ + "from the snippet.")
+ public void setDetailFeatureName(String detailFeatureName) {
+ this.detailFeatureName = detailFeatureName;
+ }
+
public String getEntityAnnotationType() {
return entityAnnotationType;
}
@RunTime
- @CreoleParameter(comment = "Annotation type representing the gold " +
- "standard annotations, i.e. the kind of entities that you want
" +
- "workers to find.")
+ @CreoleParameter(comment = "Annotation type representing the gold "
+ + "standard annotations, i.e. the kind of entities that you want "
+ + "workers to find.")
public void setEntityAnnotationType(String entityAnnotationType) {
this.entityAnnotationType = entityAnnotationType;
}
@@ -157,8 +173,7 @@
}
@RunTime
- @CreoleParameter(defaultValue = "gold",
- comment = "Name of a feature that marks a snippet as \"gold\"")
+ @CreoleParameter(defaultValue = "gold", comment = "Name of a feature that
marks a snippet as \"gold\"")
public void setGoldFeatureName(String goldFeatureName) {
this.goldFeatureName = goldFeatureName;
}
@@ -168,8 +183,7 @@
}
@RunTime
- @CreoleParameter(defaultValue = "yes",
- comment = "Value of the feature that marks a snippet as \"gold\"")
+ @CreoleParameter(defaultValue = "yes", comment = "Value of the feature that
marks a snippet as \"gold\"")
public void setGoldFeatureValue(String goldFeatureValue) {
this.goldFeatureValue = goldFeatureValue;
}
@@ -180,9 +194,8 @@
@Optional
@RunTime
- @CreoleParameter(defaultValue = "reason",
- comment = "Feature on gold snippet annotations explaining " +
- "why the snippet's entities are correct")
+ @CreoleParameter(defaultValue = "reason", comment = "Feature on gold snippet
annotations explaining "
+ + "why the snippet's entities are correct")
public void setGoldReasonFeatureName(String goldReasonFeatureName) {
this.goldReasonFeatureName = goldReasonFeatureName;
}
@@ -204,38 +217,58 @@
if(jobId == null || jobId.longValue() <= 0) {
throw new ExecutionException("Job ID must be provided");
}
-
- AnnotationSet tokens =
getDocument().getAnnotations(tokenASName).get(tokenAnnotationType);
- AnnotationSet snippetAnnotations =
getDocument().getAnnotations(snippetASName)
- .get(snippetAnnotationType);
- AnnotationSet goldAS =
getDocument().getAnnotations(entityASName).get(entityAnnotationType);
-
+
+ AnnotationSet tokens =
+ getDocument().getAnnotations(tokenASName)
+ .get(tokenAnnotationType);
+ AnnotationSet snippetAnnotations =
+ getDocument().getAnnotations(snippetASName).get(
+ snippetAnnotationType);
+ AnnotationSet goldAS =
+ getDocument().getAnnotations(entityASName).get(
+ entityAnnotationType);
+
List<Annotation> allSnippets = Utils.inDocumentOrder(snippetAnnotations);
- fireStatusChanged("Creating CrowdFlower units for " + allSnippets.size()
+ " "
- + snippetAnnotationType + " annotations for " +
entityAnnotationType
- + " annotation task");
-
+ fireStatusChanged("Creating CrowdFlower units for " + allSnippets.size()
+ + " " + snippetAnnotationType + " annotations for "
+ + entityAnnotationType + " annotation task");
+
int snippetIdx = 0;
for(Annotation snippet : allSnippets) {
fireProgressChanged((100 * snippetIdx++) / allSnippets.size());
if(isInterrupted()) throw new ExecutionInterruptedException();
- AnnotationSet snippetTokens = Utils.getContainedAnnotations(tokens,
snippet);
+ AnnotationSet snippetTokens =
+ Utils.getContainedAnnotations(tokens, snippet);
+ String detail = null;
+ if(detailFeatureName != null) {
+ Object detailObj = snippet.getFeatures().get(detailFeatureName);
+ if(detailObj != null) {
+ detail = detailObj.toString();
+ }
+ }
AnnotationSet goldAnnots = null;
String goldReason = null;
if(goldFeatureValue.equals(snippet.getFeatures().get(goldFeatureName))) {
goldAnnots = Utils.getContainedAnnotations(goldAS, snippet);
if(goldReasonFeatureName != null) {
- Object goldReasonValue =
snippet.getFeatures().get(goldReasonFeatureName);
- if(goldReasonValue != null) goldReason =
goldReasonValue.toString();
+ Object goldReasonValue =
+ snippet.getFeatures().get(goldReasonFeatureName);
+ if(goldReasonValue != null)
+ goldReason = goldReasonValue.toString();
}
}
-
- long unitId = crowdFlowerClient.createAnnotationUnit(
- jobId, getDocument(), snippetASName, snippet, snippetTokens,
goldAnnots, goldReason);
- // store the unit ID - we use the entity annotation type as part of
this feature
- // name so the same sentences can hold units for different annotation
types
+
+ long unitId =
+ crowdFlowerClient.createAnnotationUnit(jobId, getDocument(),
+ snippetASName, snippet, detail, snippetTokens,
+ goldAnnots, goldReason);
+ // store the unit ID - we use the entity annotation type as part
+ // of this feature
+ // name so the same sentences can hold units for different
+ // annotation types
// e.g. Person, Location, Organization
- snippet.getFeatures().put(entityAnnotationType + "_unit_id",
Long.valueOf(unitId));
+ snippet.getFeatures().put(entityAnnotationType + "_unit_id",
+ Long.valueOf(unitId));
}
fireProcessFinished();
fireStatusChanged(allSnippets.size() + " units created");
@@ -243,9 +276,9 @@
interrupted = false;
}
}
-
+
private List<Action> actions = null;
-
+
public List<Action> getActions() {
if(actions == null) {
actions = new ArrayList<Action>();
Modified:
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/rest/CrowdFlowerClient.java
===================================================================
---
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/rest/CrowdFlowerClient.java
2014-04-10 12:00:46 UTC (rev 17795)
+++
gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/rest/CrowdFlowerClient.java
2014-04-10 12:28:34 UTC (rev 17796)
@@ -98,15 +98,14 @@
// construct the CML with the specified caption and common radio
// options
- cml.append("<h2 id=\"unit_text\">{{text}}</h2>\n\n"
- + "<cml:radios validates=\"required\" label=\"");
+ cml.append("<h2 id=\"unit_text\">{{text}}</h2>\n\n" + "{% if detail %}\n"
+ + " <div class=\"well\">{{detail}}</div>\n"
+ + "{% endif %}\n" + "<cml:radios validates=\"required\" label=\"");
StringEscapeUtils.escapeXml(cml, caption);
- cml.append("\" name=\"answer\">\n"
- + " {% for opt in options %}\n"
+ cml.append("\" name=\"answer\">\n" + " {% for opt in options %}\n"
+ " {% if opt.description %}\n"
+ " {% assign desc = opt.description %}\n"
- + " {% else %}\n"
- + " {% assign desc = opt.value %}\n"
+ + " {% else %}\n" + " {% assign desc = opt.value %}\n"
+ " {% endif %}\n"
+ " <cml:radio value=\"{{opt.value}}\" label=\"{{desc}}\" />\n"
+ " {% endfor %}\n");
@@ -157,13 +156,12 @@
* </p>
*
* <p>
- * If the target annotation has a feature named "correct" then it
- * will be treated as a gold-standard unit. The "correct" feature
- * must match one of the "options" (i.e. one of the <i>keys</i> if
- * options is a Map) or one of the common options defined when
- * the job was created - typically things like "none" (none of the
- * available options is correct) or "nae" (the target is not an
- * entity).
+ * If the target annotation has a feature named "correct" then it will
+ * be treated as a gold-standard unit. The "correct" feature must
+ * match one of the "options" (i.e. one of the <i>keys</i> if options
+ * is a Map) or one of the common options defined when the job was
+ * created - typically things like "none" (none of the available
+ * options is correct) or "nae" (the target is not an entity).
* </p>
*
* @param jobId the CrowdFlower job ID
@@ -200,6 +198,11 @@
formDataSize += (2 * ((Collection<?>)options).size());
}
}
+
+ if(target.getFeatures().get("detail") != null) {
+ formDataSize += 2;
+ }
+
String correctAnswer = (String)target.getFeatures().get("correct");
String reason = (String)target.getFeatures().get("reason");
if(correctAnswer != null) {
@@ -236,6 +239,12 @@
}
}
}
+
+ if(target.getFeatures().get("detail") != null) {
+ formData[i++] = "unit[data][detail]";
+ formData[i++] = target.getFeatures().get("detail").toString();
+ }
+
if(correctAnswer != null) {
formData[i++] = "unit[golden]";
formData[i++] = "true";
@@ -258,10 +267,10 @@
+ target, e);
}
}
-
+
/**
- * Get the list of judgments for the given unit. If there are no judgments,
- * null is returned.
+ * Get the list of judgments for the given unit. If there are no
+ * judgments, null is returned.
*
* @param jobId the CrowdFlower job identifier
* @param unitId the unit identifier
@@ -272,7 +281,8 @@
String uri = "/jobs/" + jobId + "/units/" + unitId;
JsonElement unitResponse = get(uri);
if(!unitResponse.isJsonObject()) {
- throw new GateRuntimeException("Response from " + uri + " was not a
JSON object");
+ throw new GateRuntimeException("Response from " + uri
+ + " was not a JSON object");
}
JsonElement results = unitResponse.getAsJsonObject().get("results");
if(!results.isJsonObject()) {
@@ -289,16 +299,16 @@
throw new GateRuntimeException("Could not retrieve unit details", e);
}
}
-
+
/**
* Create a named entity annotation job on CrowdFlower.
*
* @param title the job title
* @param instructions the instructions
- * @param caption a caption for the answer form, which should include the
- * entity type to be annotated.
+ * @param caption a caption for the answer form, which should include
+ * the entity type to be annotated.
* @param noEntitiesCaption a caption for the "there are no entities"
- * checkbox.
+ * checkbox.
* @return the newly created job ID.
* @throws IOException
*/
@@ -308,7 +318,7 @@
log.debug("title: " + title);
log.debug("instructions: " + instructions);
log.debug("caption: " + caption);
-
+
// load the CSS that makes highlighting work
InputStream cssStream =
CrowdFlowerClient.class.getResourceAsStream("gate-crowdflower.css");
@@ -319,7 +329,8 @@
cssStream.close();
}
- // load the JavaScript that toggles the colour of tokens when clicked
+ // load the JavaScript that toggles the colour of tokens when
+ // clicked
InputStream jsStream =
CrowdFlowerClient.class.getResourceAsStream("gate-crowdflower.js");
String js = null;
@@ -337,11 +348,12 @@
cml.append("\" name=\"answer\">\n"
+ " {% for tok in tokens %}\n"
+ " <cml:checkbox label=\"{{ tok }}\" value=\"{{
forloop.index0 }}\" />\n"
- + " {% endfor %}\n"
- + " </cml:checkboxes>\n"
- + "</div>\n"
- + "<div class=\"gate-no-entities\">\n"
- // TODO work out how to customize the validation error message
+ + " {% endfor %}\n" + " </cml:checkboxes>\n" + "</div>\n"
+ + "{% if detail %}\n"
+ + " <div class=\"well\">{{detail}}</div>\n"
+ + "{% endif %}\n" + "<div class=\"gate-no-entities\">\n"
+ // TODO work out how to customize the validation error
+ // message
+ " <cml:checkbox name=\"noentities\" label=\"");
StringEscapeUtils.escapeXml(cml, noEntitiesCaption);
cml.append("\" value=\"1\"\n"
@@ -361,41 +373,47 @@
throw new GateRuntimeException("Failed to create CF job");
}
}
-
+
/**
* Create a single unit for an entity annotation job.
*
* @param jobId the CrowdFlower job ID
* @param doc the document containing the annotation
* @param asName the annotation set containing the snippet annotation
- * @param snippet an annotation covering the snippet of text that
- * will be presented for annotation, typically a Sentence or
- * Tweet
- * @param tokens annotations representing the individual substrings
- * of the snippet that will be the atomic units of annotation.
- * Typically these will be Token annotations. The supplied
- * "tokens" should completely cover the non-whitespace content
- * of the snippet, but need not cover all the intervening
- * spaces.
+ * @param snippet an annotation covering the snippet of text that will
+ * be presented for annotation, typically a Sentence or Tweet
+ * @param detail additional details to be shown to the annotator below
+ * the snippet, e.g. a list of URL links that they might want
+ * to follow for more information. May be null, in which case
+ * no detail section will be added.
+ * @param tokens annotations representing the individual substrings of
+ * the snippet that will be the atomic units of annotation.
+ * Typically these will be Token annotations. The supplied
+ * "tokens" should completely cover the non-whitespace
+ * content of the snippet, but need not cover all the
+ * intervening spaces.
* @param correctAnnotations annotations representing the "correct"
- * answer - if this parameter is not <code>null</code> then
- * the unit will be considered as gold-standard data. This
- * includes the case where an empty annotation set is
- * provided, as this represents a gold snippet where the
- * correct answer is that this snippet contains no entities.
- * @param goldReason for a gold-standard unit, the <em>reason</em>
- * why the annotations should be considered correct. This
- * will be shown to users as feedback if they get the gold
- * unit wrong. Ignored for non-gold units.
+ * answer - if this parameter is not <code>null</code> then
+ * the unit will be considered as gold-standard data. This
+ * includes the case where an empty annotation set is
+ * provided, as this represents a gold snippet where the
+ * correct answer is that this snippet contains no entities.
+ * @param goldReason for a gold-standard unit, the <em>reason</em> why
+ * the annotations should be considered correct. This will be
+ * shown to users as feedback if they get the gold unit
+ * wrong. Ignored for non-gold units.
* @return the ID of the newly-created unit.
*/
public long createAnnotationUnit(long jobId, Document doc, String asName,
- Annotation snippet, AnnotationSet tokens, AnnotationSet
correctAnnotations,
- String goldReason) {
+ Annotation snippet, String detail, AnnotationSet tokens,
+ AnnotationSet correctAnnotations, String goldReason) {
String documentId = String.valueOf(doc.getLRPersistenceId());
int formDataSize = 6; // docId + asName + annId
List<Annotation> tokensList = Utils.inDocumentOrder(tokens);
- formDataSize += 2*tokensList.size();
+ formDataSize += 2 * tokensList.size();
+ if(detail != null) {
+ formDataSize += 2;
+ }
Set<Integer> answerGold = null;
if(correctAnnotations != null) {
// gold unit
@@ -403,7 +421,8 @@
for(Annotation a : correctAnnotations) {
for(int i = 0; i < tokensList.size(); i++) {
Annotation tokenI = tokensList.get(i);
- if(Utils.start(tokenI) >= Utils.start(a) && Utils.end(tokenI) <=
Utils.end(a)) {
+ if(Utils.start(tokenI) >= Utils.start(a)
+ && Utils.end(tokenI) <= Utils.end(a)) {
answerGold.add(i);
}
}
@@ -412,13 +431,15 @@
if(answerGold.size() == 0) {
formDataSize += 2; // noentities=1
} else {
- formDataSize += 2*answerGold.size(); // answer=N for each token
+ formDataSize += 2 * answerGold.size(); // answer=N for each
+ // token
}
if(goldReason != null) {
- formDataSize += 2; // answer_gold_reason or noentities_gold_reason
+ formDataSize += 2; // answer_gold_reason or
+ // noentities_gold_reason
}
}
-
+
String[] formData = new String[formDataSize];
int i = 0;
formData[i++] = "unit[data][documentId]";
@@ -431,6 +452,10 @@
formData[i++] = "unit[data][tokens][]";
formData[i++] = Utils.stringFor(doc, tok);
}
+ if(detail != null) {
+ formData[i++] = "unit[data][detail]";
+ formData[i++] = detail;
+ }
if(answerGold != null) {
formData[i++] = "unit[golden]";
formData[i++] = "true";
@@ -442,7 +467,8 @@
formData[i++] = goldReason;
}
} else {
- Integer[] goldArray = answerGold.toArray(new
Integer[answerGold.size()]);
+ Integer[] goldArray =
+ answerGold.toArray(new Integer[answerGold.size()]);
Arrays.sort(goldArray);
for(Integer tokIndex : goldArray) {
formData[i++] = "unit[data][answer_gold][]";
@@ -468,7 +494,7 @@
protected JsonElement post(String uri, String... formData) throws
IOException {
return request("POST", uri, formData);
}
-
+
protected JsonElement get(String uri) throws IOException {
return request("GET", uri);
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Put Bad Developers to Shame
Dominate Development with Jenkins Continuous Integration
Continuously Automate Build, Test & Deployment
Start a new project now. Try Jenkins in the cloud.
http://p.sf.net/sfu/13600_Cloudbees
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs