Author: rwesten
Date: Wed Jun 5 08:26:31 2013
New Revision: 1489739
URL: http://svn.apache.org/r1489739
Log:
STANBOL-1070: local fix for the incident as described by STANBOL-1091, added
debug level loggings; added svn ignores
Modified:
stanbol/trunk/enhancement-engines/entitycomention/ (props changed)
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/EntityMention.java
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/InMemoryEntityIndex.java
Propchange: stanbol/trunk/enhancement-engines/entitycomention/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Jun 5 08:26:31 2013
@@ -0,0 +1,7 @@
+.project
+
+.classpath
+
+target
+
+.settings
Modified:
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java?rev=1489739&r1=1489738&r2=1489739&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
(original)
+++
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
Wed Jun 5 08:26:31 2013
@@ -153,7 +153,7 @@ import org.slf4j.LoggerFactory;
@Service(value=EnhancementEngine.class)
public class EntityCoMentionEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> implements
ServiceProperties {
- private static final Integer ENGINE_ORDERING =
ServiceProperties.ORDERING_POST_PROCESSING - 90;
+ private static final Integer ENGINE_ORDERING =
ServiceProperties.ORDERING_POST_PROCESSING + 90;
private static final Map<String,Object> SERVICE_PROPERTIES =
Collections.unmodifiableMap(Collections.singletonMap(
ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
@@ -170,7 +170,7 @@ public class EntityCoMentionEngine exten
@Reference
protected LabelTokenizer labelTokenizer;
- private BundleContext bundleContext;
+// private BundleContext bundleContext;
/**
* EntityLinking configuration used for Co-Mention extractions
*/
@@ -193,7 +193,7 @@ public class EntityCoMentionEngine exten
super.activate(ctx);
log.info("activate {}[name:{}]",getClass().getSimpleName(),getName());
Dictionary<String,Object> properties = ctx.getProperties();
- bundleContext = ctx.getBundleContext();
+// bundleContext = ctx.getBundleContext();
//extract TextProcessing and EnityLinking config from the provided
properties
textProcessingConfig = TextProcessingConfig.createInstance(properties);
linkerConfig =
EntityLinkerConfig.createInstance(properties,prefixService);
@@ -353,8 +353,6 @@ public class EntityCoMentionEngine exten
}
//now process initial mention(s) for the co-mention
for(UriRef initialMention : initialMentions){
- //link the co-mentation with the initial one
- metadata.add(new TripleImpl(textAnnotation,
DC_RELATION, initialMention));
//check confidence of the initial one
Double confidnece =
EnhancementEngineHelper.get(metadata, initialMention,
ENHANCER_CONFIDENCE, Double.class, literalFactory);
@@ -382,6 +380,9 @@ public class EntityCoMentionEngine exten
metadata.add(new
TripleImpl((NonLiteral)suggestion, DC_RELATION, textAnnotation));
}
+ //finally link the co-mentation with the initial one
+ metadata.add(new TripleImpl(textAnnotation,
DC_RELATION, initialMention));
+ //metadata.add(new TripleImpl(initialMention,
DC_RELATION, textAnnotation));
}
//TODO: support also Entities
if(maxConfidence != null){ //set the confidence value (if
known)
Modified:
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java?rev=1489739&r1=1489738&r2=1489739&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java
(original)
+++
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java
Wed Jun 5 08:26:31 2013
@@ -82,8 +82,8 @@ public class ContentItemMentionBuilder e
}
private void registerMention(EntityMention entityMention){
+ log.debug(" > register {} ",entityMention);
if(entityMention.getStart() == null || entityMention.getStart() < 0){
- log.debug(" > add global Mention[entity:
{}]",entityMention.getId());
addEntity(entityMention);
} else {
Collection<EntityMention> mentions =
mentionIndex.get(entityMention.getEnd());
@@ -102,15 +102,18 @@ public class ContentItemMentionBuilder e
*/
@Override
public void startToken(Token token) {
- Integer actIndex = token.getStart();
- for(Collection<EntityMention> mentions :
mentionIndex.subMap(lastIndex, actIndex).values()){
- for(EntityMention mention : mentions){
- log.debug(" > add Mention[index: [{},{}], entity: {}]",new
Object[]{
- mention.getStart(),mention.getEnd(), mention.getId()});
- addEntity(mention);
+ log.debug(" > start token: {}",token);
+ final Integer actIndex = token.getStart();
+ if(actIndex > lastIndex){
+ for(Collection<EntityMention> mentions :
mentionIndex.subMap(lastIndex, actIndex).values()){
+ for(EntityMention mention : mentions){
+ addEntity(mention);
+ }
}
- }
- lastIndex = actIndex;
+ lastIndex = actIndex;
+ } else if(lastIndex > actIndex){
+ log.warn("Token {} has earlier start index as the last one {}!",
token, lastIndex);
+ } // else the same index ... ignore
}
@Override
Modified:
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/EntityMention.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/EntityMention.java?rev=1489739&r1=1489738&r2=1489739&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/EntityMention.java
(original)
+++
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/EntityMention.java
Wed Jun 5 08:26:31 2013
@@ -5,9 +5,11 @@ import java.util.Iterator;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.PlainLiteral;
import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.collections.IteratorUtils;
import org.apache.stanbol.enhancer.engines.entitycomention.CoMentionConstants;
import
org.apache.stanbol.enhancer.engines.entitycomention.EntityCoMentionEngine;
import org.apache.stanbol.enhancer.engines.entitylinking.Entity;
+import
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig;
import org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker;
/**
@@ -24,7 +26,7 @@ public class EntityMention extends Entit
/**
* The label field of this Entity
*/
- public final UriRef labelField;
+ private final UriRef nameField;
/**
* The type field of this Entity
*/
@@ -52,7 +54,7 @@ public class EntityMention extends Entit
if(labelField == null){
throw new IllegalArgumentException("The LabelField MUST NOT be
NULL!");
}
- this.labelField = labelField;
+ this.nameField = labelField;
if(typeField == null){
throw new IllegalArgumentException("The TypeFeild MUST NOT be
NULL!");
}
@@ -82,7 +84,7 @@ public class EntityMention extends Entit
public Iterator<PlainLiteral> getText(UriRef field) {
if(CO_MENTION_FIELD_HASH == field.hashCode() && //avoid calling equals
CoMentionConstants.CO_MENTION_LABEL_FIELD.equals(field)){
- return super.getText(labelField);
+ return super.getText(nameField);
} else if(CO_MENTION_TYPE_HASH == field.hashCode() && //avoid calling
equals
CoMentionConstants.CO_MENTION_TYPE_FIELD.equals(field)){
return super.getText(typeField);
@@ -95,7 +97,7 @@ public class EntityMention extends Entit
public Iterator<UriRef> getReferences(UriRef field) {
if(CO_MENTION_FIELD_HASH == field.hashCode() && //avoid calling equals
CoMentionConstants.CO_MENTION_LABEL_FIELD.equals(field)){
- return super.getReferences(labelField);
+ return super.getReferences(nameField);
} else if(CO_MENTION_TYPE_HASH == field.hashCode() && //avoid calling
equals
CoMentionConstants.CO_MENTION_TYPE_FIELD.equals(field)){
return super.getReferences(typeField);
@@ -103,14 +105,62 @@ public class EntityMention extends Entit
return super.getReferences(field);
}
}
-
+ /**
+ * Checks if this mention does have a span assigned. EntityMentions without
+ * a span are considered to be valid from the begin of the document.
Examples
+ * could be manually tagged entities or entities extracted from the
metadata
+ * of an document.
+ * @return if this entity has a span or not.
+ */
public boolean hasSpan(){
return span != null;
}
+ /**
+ * The start of the span selected by this mention or <code>null</code> if
this
+ * mention does not have a span assigned.
+ * @return the start char position of the mention or <code>null</code> if
none
+ */
public Integer getStart(){
return span != null ? span[0] : null;
}
+ /**
+ * The end of the span selected by this mention or <code>null</code> if
this
+ * mention does not have a span assigned.
+ * @return the end char position of the mention or <code>null</code> if
none
+ */
public Integer getEnd(){
return span != null ? span[1] : null;
}
+ /**
+ * The field used to obtain the names of the entities. For EntityMentions
+ * this is set on a per instance base, as the field my differ between
+ * different {@link EntityMention}s
+ * @return the field (property) used to obtain the labels of this mention
+ * @see EntityLinkerConfig#getNameField()
+ */
+ public UriRef getNameField() {
+ return nameField;
+ }
+ /**
+ * The field used to obtain the types of entities. For EntityMentions
+ * this is set on a per instance base, as the field my differ between
+ * different {@link EntityMention}s
+ * @return the field (property) used to obtain the type of this mention
+ * @see EntityLinkerConfig#getTypeField()
+ */
+ public UriRef getTypeField() {
+ return typeField;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new
StringBuilder(EntityMention.class.getSimpleName());
+ sb.append(' ').append(getId()).append(" [labels: ");
+ sb.append(IteratorUtils.toList(getText(nameField)).toString());
+ if(hasSpan()){
+ sb.append(" |
span:[").append(getStart()).append(',').append(getEnd()).append(']');
+ }
+ sb.append(']');
+ return sb.toString();
+ }
}
Modified:
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/InMemoryEntityIndex.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/InMemoryEntityIndex.java?rev=1489739&r1=1489738&r2=1489739&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/InMemoryEntityIndex.java
(original)
+++
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/InMemoryEntityIndex.java
Wed Jun 5 08:26:31 2013
@@ -43,6 +43,8 @@ import org.apache.stanbol.enhancer.engin
import org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcher;
import org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer;
import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* EntitySearch implementation that does hold Entity information of mentioned
* Entities in memory.
@@ -51,6 +53,8 @@ import org.apache.stanbol.enhancer.servi
*/
public class InMemoryEntityIndex implements EntitySearcher {
+ private final Logger log =
LoggerFactory.getLogger(InMemoryEntityIndex.class);
+
protected final LabelTokenizer tokenizer;
//Holds Entity data
private SortedMap<String,Collection<Entity>> index = new
TreeMap<String,Collection<Entity>>(String.CASE_INSENSITIVE_ORDER);
@@ -72,6 +76,9 @@ public class InMemoryEntityIndex impleme
public void addEntity(Entity entity){
+ if(log.isDebugEnabled()){
+ log.debug(" > register {}",entity);
+ }
entities.put(entity.getUri(), entity);
Iterator<PlainLiteral> labels = entity.getText(nameField);
while(labels.hasNext()){