Modified: incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java?rev=1379385&r1=1379384&r2=1379385&view=diff ============================================================================== --- incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java (original) +++ incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java Fri Aug 31 09:39:57 2012 @@ -16,91 +16,216 @@ */ package org.apache.stanbol.enhancer.engine.disambiguation.mlt; +import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE; +import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE; +import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT; + +import java.util.SortedMap; +import java.util.SortedSet; + +import org.apache.clerezza.rdf.core.LiteralFactory; +import org.apache.clerezza.rdf.core.TripleCollection; +import org.apache.clerezza.rdf.core.UriRef; +import org.apache.stanbol.enhancer.servicesapi.ContentItem; +import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; +import org.apache.stanbol.enhancer.servicesapi.rdf.Properties; import org.apache.stanbol.entityhub.servicesapi.model.Entity; import org.apache.stanbol.entityhub.servicesapi.model.Text; +import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum; +import org.apache.stanbol.entityhub.servicesapi.site.Site; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A suggestion of an {@link Entity} for a fise:TextAnnotation processed * by the NamedEntityTaggingEngine - * @author Rupert Westenthaler */ public class Suggestion implements Comparable<Suggestion>{ - private final Entity entity; - private double levenshtein = -1; - private Double score; - private Text matchedLabel; - private String URI; - protected Suggestion(Entity entity){ - this.entity = entity; + + private static final Logger log = LoggerFactory.getLogger(Suggestion.class); + + private static final LiteralFactory lf = LiteralFactory.getInstance(); + + private static final UriRef ENTITYHUB_SITE = new UriRef(RdfResourceEnum.site.getUri()); + + private UriRef entityAnnotation; + private UriRef entityUri; + private Double originalConfidnece; + + private Entity entity; + private Double normalizedDisambiguationScore; + private Double disambiguatedConfidence; + private String site; + + + private Suggestion(UriRef entityAnnotation){ + this.entityAnnotation = entityAnnotation; } + public Suggestion(Entity entity){ + this.entity = entity; + this.entityUri = new UriRef(entity.getId()); + this.site = entity.getSite(); + } /** - * @return the levenshtein + * Allows to create Suggestions from existing fise:TextAnnotation contained + * in the metadata of the processed {@link ContentItem} + * @param graph + * @param entityAnnotation + * @return */ - public final double getLevenshtein() { - return levenshtein; + public static Suggestion createFromEntityAnnotation(TripleCollection graph, UriRef entityAnnotation){ + Suggestion suggestion = new Suggestion(entityAnnotation); + suggestion.entityUri = EnhancementEngineHelper.getReference( + graph, entityAnnotation, ENHANCER_ENTITY_REFERENCE); + if(suggestion.entityUri == null){ + //most likely not a fise:EntityAnnotation + log.debug("Unable to create Suggestion for EntityAnnotation {} " + + "because property {} is not present", entityAnnotation, ENHANCER_ENTITY_REFERENCE); + return null; + } + suggestion.originalConfidnece = EnhancementEngineHelper.get( + graph, entityAnnotation, ENHANCER_CONFIDENCE, Double.class, lf); + if(suggestion.originalConfidnece == null){ + log.warn("EntityAnnotation {} does not define a value for " + + "property {}. Will use '0' as fallback", entityAnnotation, ENHANCER_CONFIDENCE); + suggestion.originalConfidnece = 0.0; + } + suggestion.site = EnhancementEngineHelper.getString(graph, entityAnnotation, ENTITYHUB_SITE); + //NOTE: site might be NULL + return suggestion; } - /** - * @param levenshtein the levenshtein to set + * The URI of the fise:EntityAnnotation representing this suggestion in the + * {@link ContentItem#getMetadata() metadata} of the processed + * {@link ContentItem}. This will be <code>null</code> if this Suggestion + * was created as part of the Disambiguation process and was not present + * in the metadata of the content item before the disambiguation. + * @return the URI of the fise:EntityAnnotation or <code>null</code> if + * not present. */ - protected final void setLevenshtein(double levenshtein) { - this.levenshtein = levenshtein; + public UriRef getEntityAnnotation() { + return entityAnnotation; } - - /** - * @return the score + * Allows to set the URI of the fise:EntityAnnotation. This is required + * if the original enhancement structure shared one fise:EntityAnnotation + * instance for two fise:TextAnnotations (e.g. because both TextAnnotations + * had the exact same value for fise:selected-text). After + * disambiguation it is necessary to 'clone' fise:EntityAnnotations like + * that to give them different fise:confidence values. Because of that + * it is supported to set the new URI of the cloned fise:EntityAnnotation. + * @param uri the uri of the cloned fise:EntityAnnotation */ - public final Double getScore() { - return score; + public void setEntityAnnotation(UriRef uri) { + this.entityAnnotation = uri; } - /** - * @param score the score to set + * The URI of the Entity (MUST NOT be <code>null</code>) + * @return the URI */ - protected final void setScore(Double score) { - this.score = score; + public UriRef getEntityUri() { + return entityUri; } - /** - * @return the matchedLabel + * The original confidence of the fise:EntityAnnotation or <code>null</code> + * if not available. + * @return */ - public final Text getMatchedLabel() { - return matchedLabel; + public Double getOriginalConfidnece() { + return originalConfidnece; } - /** - * @param matchedLabel the matchedLabel to set + * The {@link Entity} or <code>null</code> if not available. For + * Suggestions that are created based on fise:EntityAnnotations the Entity + * is not available. Entities might be loaded as part of the + * Disambiguation process. + * @return the {@link Entity} or <code>null</code> if not available + */ + public Entity getEntity() { + return entity; + } + + /** + * The score of the disambiguation. This is just the score of the + * disambiguation that is not yet combined with the + * {@link #getOriginalConfidnece()} to become the + * {@link #getDisambiguatedConfidence()} + * @return the disambiguation score */ - protected final void setMatchedLabel(Text matchedLabel) { - this.matchedLabel = matchedLabel; + public Double getNormalizedDisambiguationScore() { + return normalizedDisambiguationScore; } - protected final String getURI() { - return this.URI; + /** + * The confidence after disambiguation. Will be <code>null</code> at the + * beginning + * @return the disambiguated confidence or <code>null</code> if not yet + * disambiguated + */ + public Double getDisambiguatedConfidence() { + return disambiguatedConfidence; } - protected final void setURI(String URI) { - this.URI = URI; + /** + * The name of the Entityhub {@link Site} the suggested Entity is + * managed. + * @return the name of the Entityhub {@link Site} + */ + public String getSite() { + return site; } - /** - * @return the entity + * Setter for the normalized [0..1] score of the disambiguation + * @param normalizedDisambiguationScore */ - public final Entity getEntity() { - return entity; + public void setNormalizedDisambiguationScore(Double normalizedDisambiguationScore) { + this.normalizedDisambiguationScore = normalizedDisambiguationScore; + } + /** + * Setter for the confidence after disambiguation + * @param disambiguatedConfidence + */ + public void setDisambiguatedConfidence(Double disambiguatedConfidence) { + this.disambiguatedConfidence = disambiguatedConfidence; } - @Override - public int compareTo(Suggestion other) { - return other.score.compareTo(score); + public int hashCode() { + return entityUri.hashCode(); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof Suggestion && ((Suggestion)obj).entityUri.equals(entityUri); } + /** + * Compares based on the {@link #getDisambiguatedConfidence()} (if present) + * and falls back to the {@link #getOriginalConfidnece()}. If the + * original confidence value is not present or both Suggestions do have + * the same confidence the natural order of the Entities URI is used. This + * also ensures <code>(x.compareTo(y)==0) == (x.equals(y))</code> and + * allows to use this class with {@link SortedMap} and {@link SortedSet} + * implementations.<p> + */ + @Override + public int compareTo(Suggestion other) { + int result; + if(disambiguatedConfidence != null && other.disambiguatedConfidence != null){ + result = other.disambiguatedConfidence.compareTo(disambiguatedConfidence); + } else if(other.originalConfidnece != null && originalConfidnece != null){ + result = other.originalConfidnece.compareTo(originalConfidnece); + } else { + result = 0; + } + //ensure (x.compareTo(y)==0) == (x.equals(y)) + return result == 0 ? entityUri.getUnicodeString().compareTo( + other.entityUri.getUnicodeString()) : result; + } }
Modified: incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1379385&r1=1379384&r2=1379385&view=diff ============================================================================== --- incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java (original) +++ incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java Fri Aug 31 09:39:57 2012 @@ -533,7 +533,7 @@ public class NamedEntityTaggingEngine ex // and labels in the same language as the content (language != null && label.getLanguage().startsWith(language))) { double actMatch = levenshtein( - casesensitive ? label.getText().toLowerCase() : label.getText(), namedEntityLabel); + casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel); if (actMatch > match.getLevenshtein()) { match.setLevenshtein(actMatch); match.setMatchedLabel(label); Modified: incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1379385&r1=1379384&r2=1379385&view=diff ============================================================================== --- incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java (original) +++ incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java Fri Aug 31 09:39:57 2012 @@ -741,7 +741,7 @@ public class KeywordLinkingEngine if(minSearchTokenLength < 1){ throw new ConfigurationException(MIN_SEARCH_TOKEN_LENGTH, "Values MUST be valid Integer values > 0"); } - linkerConfig.setMaxSuggestions(minSearchTokenLength); + linkerConfig.setMinSearchTokenLength(minSearchTokenLength); } //init the REDIRECT_PROCESSING_MODE value = configuration.get(REDIRECT_PROCESSING_MODE); Modified: incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java?rev=1379385&r1=1379384&r2=1379385&view=diff ============================================================================== --- incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java (original) +++ incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java Fri Aug 31 09:39:57 2012 @@ -410,7 +410,9 @@ public class EntityLinker { //ensure the correct order of the tokens in the suggested entity boolean search = true; int firstFoundIndex = -1; + int firstProcessableFoundIndex = -1; int lastFoundIndex = -1; + int lastProcessableFoundIndex = -1; int firstFoundLabelIndex = -1; int lastfoundLabelIndex = -1; Token currentToken; @@ -462,6 +464,10 @@ public class EntityLinker { if(found){ //found if(isProcessable){ foundProcessableTokens++; //only count processable Tokens + if(firstProcessableFoundIndex < 0){ + firstProcessableFoundIndex = currentIndex; + } + lastProcessableFoundIndex = currentIndex; } foundTokens++; foundTokenMatch = foundTokenMatch + matchFactor; //sum up the matches @@ -512,6 +518,7 @@ public class EntityLinker { if(found){ //found if(isProcessable){ foundProcessableTokens++; //only count processable Tokens + firstProcessableFoundIndex = currentIndex; } foundTokens++; foundTokenMatch = foundTokenMatch + matchFactor; //sum up the matches @@ -533,6 +540,7 @@ public class EntityLinker { //e.g. if given and family name of persons are switched MATCH labelMatch; int coveredTokens = lastFoundIndex-firstFoundIndex+1; + int coveredProcessableTokens = lastProcessableFoundIndex-firstProcessableFoundIndex+1; float labelMatchScore = (foundTokenMatch/(float)labelTokens.length); //Matching rules // - if less than config#minTokenFound() than accept only EXACT @@ -552,8 +560,8 @@ public class EntityLinker { // Tokens are found, but if all Tokens of the Label are // matched! (STANBOL-622) //foundTokens == coveredTokens) && - foundTokens >= labelTokens.length) && - labelMatchScore >= 0.6f){ + foundTokens >= labelTokens.length)){ //&& + //labelMatchScore >= 0.6f){ //same as above //if(foundTokens == coveredTokens){ if(foundTokens == labelTokens.length && foundTokens == coveredTokens){ @@ -568,7 +576,9 @@ public class EntityLinker { if(match.getMatchCount() < foundProcessableTokens || match.getMatchCount() == foundProcessableTokens && labelMatch.ordinal() > match.getMatch().ordinal()){ - match.updateMatch(labelMatch, firstFoundIndex, coveredTokens, foundTokens, +// match.updateMatch(labelMatch, firstFoundIndex, coveredTokens, foundTokens, +// foundTokenMatch/foundTokens,label,labelTokens.length); + match.updateMatch(labelMatch, firstProcessableFoundIndex, coveredProcessableTokens, foundProcessableTokens, foundTokenMatch/foundTokens,label,labelTokens.length); } //else this match is not better as the existing one } //else ignore labels with MATCH.NONE Modified: incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java?rev=1379385&r1=1379384&r2=1379385&view=diff ============================================================================== --- incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java (original) +++ incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java Fri Aug 31 09:39:57 2012 @@ -16,11 +16,13 @@ */ package org.apache.stanbol.enhancer.servicesapi.helper; +import static java.util.Collections.singleton; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE; import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Date; @@ -35,6 +37,7 @@ import org.apache.clerezza.rdf.core.Lite import org.apache.clerezza.rdf.core.LiteralFactory; import org.apache.clerezza.rdf.core.MGraph; import org.apache.clerezza.rdf.core.NonLiteral; +import org.apache.clerezza.rdf.core.Resource; import org.apache.clerezza.rdf.core.Triple; import org.apache.clerezza.rdf.core.TripleCollection; import org.apache.clerezza.rdf.core.TypedLiteral; @@ -205,6 +208,23 @@ public class EnhancementEngineHelper { return enhancement; } /** + * Adds the parsed {@link EnhancementEngine} as dc:contributer to the + * enhancement and also sets the dc:modified property accordingly + * @param metadata the {@link ContentItem#getMetadata()} + * @param enhancement the enhancement + * @param engine the engine + */ + public static void addContributingEngine(MGraph metadata, UriRef enhancement, + EnhancementEngine engine){ + LiteralFactory literalFactory = LiteralFactory.getInstance(); + // TODO: use a public dereferencing URI instead? + metadata.add(new TripleImpl(enhancement, Properties.DC_CONTRIBUTOR, + literalFactory.createTypedLiteral(engine.getClass().getName()))); + //set the modification date to the current date. + set(metadata,enhancement,Properties.DC_MODIFIED,new Date(),literalFactory); + } + + /** * Create a new extraction instance in the metadata-graph of the content * item along with default properties (dc:creator and dc:created) and return * the UriRef of the extraction so that engines can further add @@ -288,6 +308,76 @@ public class EnhancementEngineHelper { } } /** + * Replaces all current values of the property for the resource + * with the parsed value + * @param graph the graph + * @param resource the resource + * @param property the property + * @param value the value + */ + public static void set(MGraph graph, NonLiteral resource, UriRef property, Resource value){ + set(graph,resource,property,value == null ? null : singleton(value),null); + } + /** + * Replaces all current values of the property for the resource + * with the parsed values + * @param graph the graph + * @param resource the resource + * @param property the property + * @param value the value + */ + public static void set(MGraph graph, NonLiteral resource, UriRef property, Collection<Resource> values){ + set(graph,resource,property,values,null); + } + + /** + * Replaces all current values of the property for the resource + * with the parsed value + * @param graph the graph + * @param resource the resource + * @param property the property + * @param value the value. In case it is an instance of {@link Resource} it + * is directly added to the graph. Otherwise the parsed {@link LiteralFactory} + * is used to create a {@link TypedLiteral} for the parsed value. + * @param literalFactory the {@link LiteralFactory} used in case the parsed + * value is not an {@link Resource} + */ + public static void set(MGraph graph, NonLiteral resource, UriRef property, + Object value, LiteralFactory literalFactory){ + set(graph,resource,property,value == null ? null : singleton(value),literalFactory); + } + /** + * Replaces all current values of the property for the resource + * with the parsed values + * @param graph the graph + * @param resource the resource + * @param property the property + * @param value the value. In case it is an instance of {@link Resource} it + * is directly added to the graph. Otherwise the parsed {@link LiteralFactory} + * is used to create a {@link TypedLiteral} for the parsed value. + * @param literalFactory the {@link LiteralFactory} used in case the parsed + * value is not an {@link Resource} + */ + public static void set(MGraph graph, NonLiteral resource, UriRef property, + Collection<?> values, LiteralFactory literalFactory){ + Iterator<Triple> currentValues = graph.filter(resource, property, null); + while(currentValues.hasNext()){ + currentValues.next(); + currentValues.remove(); + } + if(values != null){ + for(Object value : values){ + if(value instanceof Resource){ + graph.add(new TripleImpl(resource, property, (Resource) value)); + } else if (value != null){ + graph.add(new TripleImpl(resource, property, + literalFactory.createTypedLiteral(value))); + } + } + } + } + + /** * Getter for the typed literal values of the property for a resource * @param <T> the java class the literal value needs to be converted to. * Note that the parsed LiteralFactory needs to support this conversion Modified: incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java?rev=1379385&r1=1379384&r2=1379385&view=diff ============================================================================== --- incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java (original) +++ incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java Fri Aug 31 09:39:57 2012 @@ -61,11 +61,27 @@ public class Properties { + "created"); /** + * Modification date of a resource. Used by Stanbol Enhancer to annotate the + * modification date of the enhancement if it was changed by an other + * enhancement engine as the one creating it. Multiple changes of the + * creating enhancement engines are not considered as modifications. + */ + public static final UriRef DC_MODIFIED = new UriRef(NamespaceEnum.dc + + "modified"); + + /** * The entity responsible for the creation of a resource. Used by Stanbol Enhancer to * annotate the enhancement engine that created an enhancement */ public static final UriRef DC_CREATOR = new UriRef(NamespaceEnum.dc + "creator"); + /** + * The entity contributed to a resource. Used by Stanbol Enhancer to + * annotate the enhancement engine that changed an enhancement originally + * created by an other enhancemetn engine + */ + public static final UriRef DC_CONTRIBUTOR = new UriRef(NamespaceEnum.dc + + "contributor"); /** * The nature or genre of the resource. Stanbol Enhancer uses this property to refer to Modified: incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java?rev=1379385&r1=1379384&r2=1379385&view=diff ============================================================================== --- incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java (original) +++ incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java Fri Aug 31 09:39:57 2012 @@ -28,8 +28,10 @@ import static org.apache.stanbol.enhance import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE; +import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE; +import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.GEO_LAT; import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.GEO_LONG; import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION; @@ -93,6 +95,7 @@ import org.apache.stanbol.enhancer.servi import org.apache.stanbol.enhancer.servicesapi.helper.execution.Execution; import org.apache.stanbol.enhancer.servicesapi.rdf.ExecutionMetadata; import org.apache.stanbol.enhancer.servicesapi.rdf.Properties; +import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.CONFIDENCE_LEVEL_ENUM; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -136,8 +139,8 @@ public class ContentItemResource extends * {@link Properties#ENHANCER_SELECTED_TEXT}. * This map is initialised by {@link #initOccurrences()}. */ - protected Map<UriRef,Map<String,EntityExtractionSummary>> extractionsByTypeMap = - new HashMap<UriRef,Map<String,EntityExtractionSummary>>(); + protected Map<UriRef,Map<EntityExtractionSummary,EntityExtractionSummary>> extractionsByTypeMap = + new HashMap<UriRef,Map<EntityExtractionSummary,EntityExtractionSummary>>(); private MGraph executionMetadata; @@ -247,7 +250,7 @@ public class ContentItemResource extends * Checks if there are Occurrences */ public boolean hasOccurrences(){ - for(Map<String,EntityExtractionSummary> occ : extractionsByTypeMap.values()){ + for(Map<EntityExtractionSummary,EntityExtractionSummary> occ : extractionsByTypeMap.values()){ if(!occ.isEmpty()){ return true; } @@ -278,7 +281,7 @@ public class ContentItemResource extends } } public Collection<EntityExtractionSummary> getOccurrences(UriRef type){ - Map<String,EntityExtractionSummary> typeMap = extractionsByTypeMap.get(type); + Map<EntityExtractionSummary,EntityExtractionSummary> typeMap = extractionsByTypeMap.get(type); Collection<EntityExtractionSummary> typeOccurrences; if(typeMap != null){ typeOccurrences = typeMap.values(); @@ -343,31 +346,33 @@ public class ContentItemResource extends Iterator<Triple> textAnnotations = graph.filter(null, RDF.type, ENHANCER_TEXTANNOTATION); while(textAnnotations.hasNext()){ NonLiteral textAnnotation = textAnnotations.next().getSubject(); - if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) { - // this is not the most specific occurrence of this name: skip - continue; - } + //if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) { + // // this is not the most specific occurrence of this name: skip + // continue; + //} String text = getString(graph, textAnnotation, Properties.ENHANCER_SELECTED_TEXT); if(text == null){ //ignore text annotations without text continue; } + Integer start = EnhancementEngineHelper.get(graph,textAnnotation, + ENHANCER_START,Integer.class,lf); + Integer end = EnhancementEngineHelper.get(graph,textAnnotation, + ENHANCER_END,Integer.class,lf); + Double confidence = EnhancementEngineHelper.get(graph, textAnnotation, + ENHANCER_CONFIDENCE, Double.class, lf); Iterator<UriRef> types = getReferences(graph, textAnnotation, DC_TYPE); if(!types.hasNext()){ //create an iterator over null in case no types are present types = Collections.singleton((UriRef)null).iterator(); } while(types.hasNext()){ UriRef type = types.next(); - Map<String,EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type); + Map<EntityExtractionSummary,EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type); if(occurrenceMap == null){ - occurrenceMap = new TreeMap<String,EntityExtractionSummary>(String.CASE_INSENSITIVE_ORDER); + occurrenceMap = new TreeMap<EntityExtractionSummary,EntityExtractionSummary>(); extractionsByTypeMap.put(type, occurrenceMap); } - EntityExtractionSummary entity = occurrenceMap.get(text); - if(entity == null){ - entity = new EntityExtractionSummary(text, type, defaultThumbnails); - occurrenceMap.put(text, entity); - } + EntityExtractionSummary entity = new EntityExtractionSummary(text, type, start,end,confidence,defaultThumbnails); Collection<NonLiteral> suggestions = suggestionMap.get(textAnnotation); if(suggestions != null){ for(NonLiteral entityAnnotation : suggestions){ @@ -379,10 +384,106 @@ public class ContentItemResource extends graph); } } + EntityExtractionSummary existingSummary = occurrenceMap.get(entity); + if(existingSummary == null){//new extraction summary + occurrenceMap.put(entity, entity); + } else { + //extraction summary with this text and suggestions already + //present ... only add a mention to the existing + existingSummary.addMention(new Mention(text, start, end, confidence)); + } } } } - + /** + * Mentions of {@link EntityExtractionSummary EntityExtractionSummaries}. + * @author Rupert Westenthaler + * + */ + public static class Mention implements Comparable<Mention>{ + private String name; + private Integer start; + private Integer end; + private Double conf; + + Mention(String name,Integer start, Integer end, Double confidence){ + if(name == null){ + throw new IllegalStateException("The name for a Mention MUST NOT be NULL!"); + } + this.name = name; + this.start = start; + this.end = end; + this.conf = confidence; + } + + public String getName() { + return name; + } + public Integer getStart() { + return start; + } + public Integer getEnd() { + return end; + } + public Double getConfidence() { + return conf; + } + public boolean hasOccurrence() { + return start != null && end != null; + } + public boolean hasConfidence(){ + return conf != null; + } + @Override + public int hashCode() { + return name.hashCode() + + (start != null ? start.hashCode() : 0) + + (end != null ? end.hashCode() : 0); + } + + @Override + public boolean equals(Object obj) { + if(obj instanceof Mention){ + Mention o = (Mention)obj; + if(o.name.equals(name)){ + if((o.start != null && o.start.equals(start)) || + (o.start == null && start == null)){ + if(o.end != null && o.end.equals(end)){ + return true; + } else { + return o.end == null && end == null; + } + } + } + } + return false; + } + + @Override + public int compareTo(Mention o) { + int c = String.CASE_INSENSITIVE_ORDER.compare(o.name, this.name); + if(c == 0){ + if(start != null && o.start != null){ + c = start.compareTo(o.start); + } else if(o.start != null){ + c = 1; + } else if(start != null){ + c = -1; + } + if(c == 0){ + if(o.end != null && end != null){ + c = end.compareTo(o.end); + } else if(o.end != null){ + c = -1; + } else if(end != null){ + c = 1; + } + } + } + return c; + } + } + public ChainExecution getChainExecution(){ return chainExecution; } @@ -439,29 +540,49 @@ public class ContentItemResource extends protected final String name; + protected final UriRef type; protected List<EntitySuggestion> suggestions = new ArrayList<EntitySuggestion>(); + protected Set<UriRef> suggestionSet = new HashSet<UriRef>(); - protected List<String> mentions = new ArrayList<String>(); + protected List<Mention> mentions = new ArrayList<Mention>(); public final Map<UriRef,String> defaultThumbnails; - public EntityExtractionSummary(String name, UriRef type, Map<UriRef,String> defaultThumbnails) { + + private Integer start; + + private Integer end; + + + private Double confidence; + + public EntityExtractionSummary(String name, UriRef type, Integer start, Integer end, Double confidence, Map<UriRef,String> defaultThumbnails) { this.name = name; this.type = type; - mentions.add(name); + mentions.add(new Mention(name, start, end, confidence)); this.defaultThumbnails = defaultThumbnails; + this.start = start; + this.end = end; + this.confidence = confidence; } public void addSuggestion(UriRef uri, String label, Double confidence, TripleCollection properties) { EntitySuggestion suggestion = new EntitySuggestion(uri, type, label, confidence, properties, defaultThumbnails); + suggestionSet.add(uri); if (!suggestions.contains(suggestion)) { suggestions.add(suggestion); Collections.sort(suggestions); } } + public void addMention(Mention mention){ + if(!mentions.contains(mention)){ + mentions.add(mention); + Collections.sort(mentions); + } + } public String getName() { EntitySuggestion bestGuess = getBestGuess(); @@ -470,7 +591,9 @@ public class ContentItemResource extends } return name; } - + public String getSelected(){ + return name; + } public String getUri() { EntitySuggestion bestGuess = getBestGuess(); if (bestGuess != null) { @@ -478,6 +601,13 @@ public class ContentItemResource extends } return null; } + public Double getConfidence(){ + EntitySuggestion bestGuess = getBestGuess(); + if (bestGuess != null) { + return bestGuess.getConfidence(); + } + return confidence; + } public String getSummary() { if (suggestions.isEmpty()) { @@ -485,7 +615,15 @@ public class ContentItemResource extends } return suggestions.get(0).getSummary(); } - + public Integer getStart() { + return start; + } + public Integer getEnd() { + return end; + } + public boolean hasOccurrence(){ + return start != null && end != null; + } public String getThumbnailSrc() { if (suggestions.isEmpty()) { return getMissingThumbnailSrc(); @@ -507,18 +645,41 @@ public class ContentItemResource extends } return suggestions.get(0); } - + public List<EntitySuggestion> getSuggestions() { return suggestions; } - public List<String> getMentions() { + public List<Mention> getMentions() { return mentions; } @Override public int compareTo(EntityExtractionSummary o) { - return getName().compareTo(o.getName()); + int c = String.CASE_INSENSITIVE_ORDER.compare(getName(),o.getName()); + if(c == 0){ + if(suggestionSet.equals(o.suggestionSet)){ + return 0; //assume as equals if name and suggestionSet is the same + } else { //sort by mention + if(start != null && o.start != null){ + c = start.compareTo(o.start); + } else if(o.start != null){ + c = 1; + } else if(start != null){ + c = -1; + } + if(c == 0){ + if(o.end != null && end != null){ + c = end.compareTo(o.end); + } else if(o.end != null){ + c = -1; + } else if(end != null){ + c = 1; + } + } + } + } + return c; } @Override @@ -529,10 +690,14 @@ public class ContentItemResource extends if (o == null || getClass() != o.getClass()) { return false; } - EntityExtractionSummary that = (EntityExtractionSummary) o; - - return !(name != null ? !name.equals(that.name) : that.name != null); + //if name and suggestions are the same ... consider as equals + if(getName().equalsIgnoreCase(getName())){ + return suggestionSet.equals(that.suggestionSet); + } else { + return false; + } + //return !(name != null ? !name.equals(that.name) : that.name != null); } @Override Modified: incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl?rev=1379385&r1=1379384&r2=1379385&view=diff ============================================================================== --- incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl (original) +++ incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl Fri Aug 31 09:39:57 2012 @@ -31,6 +31,12 @@ <#else> ${entity.name} </#if> + <br><span class="metadata"> + <#if entity.name != entity.selected>for:'${entity.selected}',</#if> + <#if entity.mentions?size > 1>${entity.mentions?size} mentions + <#else> + <#if entity.hasOccurrence()>pos:[${entity.start},${entity.end}]</#if></#if>, + conf:${entity.confidence?string("0.##")}</span> </th> </tr> </thead> @@ -44,7 +50,9 @@ <tr> <td class="thumb"><img src="${suggestion.thumbnailSrc}" onerror="$(this).attr('src', '${suggestion.missingThumbnailSrc}');" alt="${suggestion.label}" /></td> - <td><a href="${suggestion.uri}" title="${suggestion.summary}" class="external">${suggestion.label}</a></td> + <td><a href="${suggestion.uri}" title="${suggestion.summary}" class="external"> + ${suggestion.label}</a><br> + <span class="metadata">conf:${suggestion.confidence?string("0.##")}</span></td> </tr> </#list> <#if entity.mentions?size != 0> @@ -55,7 +63,12 @@ <#list entity.mentions as mention> <tr> <td></td> - <td>${mention}</td> + <td>${mention.name}<br><span class="metadata"> + <#if mention.hasOccurrence()> + pos:[${mention.start},${mention.end}] + </#if> + <#if mention.hasConfidence()> + , conf: ${mention.confidence}</#if></span></td> </tr> </#list> </tbody>
