Author: rwesten
Date: Mon Jul 23 07:19:09 2012
New Revision: 1364535
URL: http://svn.apache.org/viewvc?rev=1364535&view=rev
Log:
fixes STANBOL-697:
Code Change is limited to:
SparqlQueryUtils#addDataTypeValueConstraint(..)
This method supports multiple values. In the "any" mode (Entities need
only to have one of the parsed values) it adds UNIONS to the SPARQL
query. In case only a single value is parsed than the starting '{' was
added, but the '}' was never added.
That means that the reason for this is not a missing '}' but an
unintended '{' that is added for ValueConstraints (and
ReferenceConstraints) with a single value.
see http://markmail.org/message/rgc27umxg3mmcsjx for details
Other changes are related to the patch provided by David
* Adding trace level loggings
* toString methods for some classes
* Code formatting
I modified some loggings to use Logger.trace(String,Object[]) instad of
Logger.trace(String.format(..))
Modified:
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/impl/ReferencedSiteImpl.java
incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/query/FieldQueryImpl.java
incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/Constraint.java
incubator/stanbol/trunk/entityhub/query/clerezza/src/main/java/org/apache/stanbol/entityhub/query/clerezza/SparqlFieldQuery.java
incubator/stanbol/trunk/entityhub/query/clerezza/src/main/java/org/apache/stanbol/entityhub/query/clerezza/SparqlFieldQueryFactory.java
incubator/stanbol/trunk/entityhub/query/clerezza/src/main/java/org/apache/stanbol/entityhub/query/clerezza/SparqlQueryUtils.java
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/CoolUriDereferencer.java
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/LarqSearcher.java
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/SparqlDereferencer.java
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/SparqlEndpointUtils.java
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/SparqlSearcher.java
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/VirtuosoSearcher.java
Modified:
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1364535&r1=1364534&r2=1364535&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
Mon Jul 23 07:19:09 2012
@@ -66,6 +66,7 @@ import org.apache.stanbol.entityhub.serv
import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
import org.apache.stanbol.entityhub.servicesapi.query.Constraint;
import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory;
import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
@@ -79,42 +80,45 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Engine that uses a {@link Site} to search for entities for existing
TextAnnotations of an Content
- * Item.
+ * Engine that uses a {@link Site} to search for entities for existing
TextAnnotations of an Content Item.
*
* @author ogrisel, rwesten
*/
-@Component(configurationFactory = true, policy = ConfigurationPolicy.REQUIRE,
// the baseUri is required!
+@Component(configurationFactory = true, policy = ConfigurationPolicy.REQUIRE,
// the
+// baseUri
+// is
+// required!
specVersion = "1.1", metatype = true, immediate = true, inherit = true)
@Service
[email protected](value={
- @Property(name=EnhancementEngine.PROPERTY_NAME)
-})
-public class NamedEntityTaggingEngine
- extends AbstractEnhancementEngine<RuntimeException,RuntimeException>
[email protected](value = {@Property(name =
EnhancementEngine.PROPERTY_NAME)})
+public class NamedEntityTaggingEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException>
implements EnhancementEngine, ServiceProperties {
private final Logger log = LoggerFactory.getLogger(getClass());
- @Property//(value = "dbpedia")
+ @Property
+ // (value = "dbpedia")
public static final String REFERENCED_SITE_ID =
"org.apache.stanbol.enhancer.engines.entitytagging.referencedSiteId";
@Property(boolValue = false)
public static final String PERSON_STATE =
"org.apache.stanbol.enhancer.engines.entitytagging.personState";
- @Property//(value = "dbp-ont:Person")
+ @Property
+ // (value = "dbp-ont:Person")
public static final String PERSON_TYPE =
"org.apache.stanbol.enhancer.engines.entitytagging.personType";
@Property(boolValue = false)
public static final String ORG_STATE =
"org.apache.stanbol.enhancer.engines.entitytagging.organisationState";
- @Property//(value = "dbp-ont:Organisation")
+ @Property
+ // (value = "dbp-ont:Organisation")
public static final String ORG_TYPE =
"org.apache.stanbol.enhancer.engines.entitytagging.organisationType";
@Property(boolValue = false)
public static final String PLACE_STATE =
"org.apache.stanbol.enhancer.engines.entitytagging.placeState";
- @Property//(value = "dbp-ont:Place")
+ @Property
+ // (value = "dbp-ont:Place")
public static final String PLACE_TYPE =
"org.apache.stanbol.enhancer.engines.entitytagging.placeType";
/**
* Use the RDFS label as default
@@ -128,31 +132,30 @@ public class NamedEntityTaggingEngine
@Property(boolValue = true)
public static final String DEREFERENCE_ENTITIES =
"org.apache.stanbol.enhancer.engines.entitytagging.dereference";
- @Property(intValue=0)
+ @Property(intValue = 0)
public static final String SERVICE_RANKING = Constants.SERVICE_RANKING;
/**
- * The default language for labels included in the enhancement metadata
- * (if not available for the parsed content).
+ * The default language for labels included in the enhancement metadata
(if not available for the parsed
+ * content).
*/
private static final String DEFAULT_LANGUAGE = "en";
-
+
/**
- * Service of the Entityhub that manages all the active referenced Site.
This Service is used to lookup the
- * configured Referenced Site when we need to enhance a content item.
+ * Service of the Entityhub that manages all the active referenced Site.
This Service is used to lookup
+ * the configured Referenced Site when we need to enhance a content item.
*/
@Reference
protected SiteManager siteManager;
/**
- * Used to lookup Entities if the {@link #REFERENCED_SITE_ID} property is
- * set to "entityhub" or "local"
+ * Used to lookup Entities if the {@link #REFERENCED_SITE_ID} property is
set to "entityhub" or "local"
*/
@Reference
protected Entityhub entityhub;
-
+
/**
- * This holds the id of the {@link Site} used to lookup Entities
- * or <code>null</code> if the {@link Entityhub} is used.
+ * This holds the id of the {@link Site} used to lookup Entities or
<code>null</code> if the
+ * {@link Entityhub} is used.
*/
protected String referencedSiteID;
@@ -162,8 +165,6 @@ public class NamedEntityTaggingEngine
*/
public static final Integer defaultOrder = ORDERING_EXTRACTION_ENHANCEMENT;
-
-
/**
* State if text annotations of type {@link
OntologicalClasses#DBPEDIA_PERSON} are enhanced by this engine
*/
@@ -207,13 +208,13 @@ public class NamedEntityTaggingEngine
* The number of Suggestions to be added
*/
protected Integer numSuggestions = 3;
-
+
protected boolean dereferenceEntities = true;
/**
* The {@link OfflineMode} is used by Stanbol to indicate that no external
service should be referenced.
- * For this engine that means it is necessary to check if the used {@link
Site} can operate
- * offline or not.
+ * For this engine that means it is necessary to check if the used {@link
Site} can operate offline or
+ * not.
*
* @see #enableOfflineMode(OfflineMode)
* @see #disableOfflineMode(OfflineMode)
@@ -240,7 +241,7 @@ public class NamedEntityTaggingEngine
}
/**
- * Returns <code>true</code> only if Stanbol operates in {@link
OfflineMode}.
+ * Returns <code>true</code> only if Stanbol operates in {@link
OfflineMode} .
*
* @return the offline state
*/
@@ -264,7 +265,7 @@ public class NamedEntityTaggingEngine
throw new ConfigurationException(REFERENCED_SITE_ID,
"The ID of the Referenced Site is a required Parameter and
MUST NOT be an empty String!");
}
-
if(Entityhub.ENTITYHUB_IDS.contains(this.referencedSiteID.toLowerCase())){
+ if
(Entityhub.ENTITYHUB_IDS.contains(this.referencedSiteID.toLowerCase())) {
log.debug("Init NamedEntityTaggingEngine instance for the
Entityhub");
this.referencedSiteID = null;
}
@@ -303,23 +304,25 @@ public class NamedEntityTaggingEngine
public void computeEnhancements(ContentItem ci) throws EngineException {
final Site site;
- if(referencedSiteID != null) { //lookup the referenced site
+ if (referencedSiteID != null) { // lookup the referenced site
site = siteManager.getSite(referencedSiteID);
- //ensure that it is present
+ // ensure that it is present
if (site == null) {
String msg = String.format(
- "Unable to enhance %s because Referenced Site %s is
currently not active!",
- ci.getUri().getUnicodeString(), referencedSiteID);
+ "Unable to enhance %s because Referenced Site %s is
currently not active!", ci.getUri()
+ .getUnicodeString(), referencedSiteID);
log.warn(msg);
- // TODO: throwing Exceptions is currently deactivated. We need
a more clear
+ // TODO: throwing Exceptions is currently deactivated. We need
a
+ // more clear
// policy what do to in such situations
// throw new EngineException(msg);
return;
}
- //and that it supports offline mode if required
+ // and that it supports offline mode if required
if (isOfflineMode() && !site.supportsLocalMode()) {
- log.warn("Unable to enhance ci {} because OfflineMode is not
supported by ReferencedSite {}.",
- ci.getUri().getUnicodeString(), site.getId());
+ log.warn(
+ "Unable to enhance ci {} because OfflineMode is not
supported by ReferencedSite {}.", ci
+ .getUri().getUnicodeString(), site.getId());
return;
}
} else { // null indicates to use the Entityhub to lookup Entities
@@ -329,7 +332,8 @@ public class NamedEntityTaggingEngine
LiteralFactory literalFactory = LiteralFactory.getInstance();
// Retrieve the existing text annotations (requires read lock)
Map<NamedEntity,List<UriRef>> textAnnotations = new
HashMap<NamedEntity,List<UriRef>>();
- //the language extracted for the parsed content or NULL if not
available
+ // the language extracted for the parsed content or NULL if not
+ // available
String contentLangauge;
ci.getLock().readLock().lock();
try {
@@ -338,14 +342,17 @@ public class NamedEntityTaggingEngine
.hasNext();) {
UriRef uri = (UriRef) it.next().getSubject();
if (graph.filter(uri, Properties.DC_RELATION, null).hasNext())
{
- // this is not the most specific occurrence of this name:
skip
+ // this is not the most specific occurrence of this name:
+ // skip
continue;
}
NamedEntity namedEntity =
NamedEntity.createFromTextAnnotation(graph, uri);
- if(namedEntity != null){
- // This is a first occurrence, collect any subsumed
annotations
+ if (namedEntity != null) {
+ // This is a first occurrence, collect any subsumed
+ // annotations
List<UriRef> subsumed = new ArrayList<UriRef>();
- for (Iterator<Triple> it2 = graph.filter(null,
Properties.DC_RELATION, uri); it2.hasNext();) {
+ for (Iterator<Triple> it2 = graph.filter(null,
Properties.DC_RELATION, uri); it2
+ .hasNext();) {
subsumed.add((UriRef) it2.next().getSubject());
}
textAnnotations.put(namedEntity, subsumed);
@@ -354,43 +361,49 @@ public class NamedEntityTaggingEngine
} finally {
ci.getLock().readLock().unlock();
}
- //search the suggestions
- Map<NamedEntity,List<Suggestion>> suggestions = new
HashMap<NamedEntity,List<Suggestion>>(textAnnotations.size());
+ // search the suggestions
+ Map<NamedEntity,List<Suggestion>> suggestions = new
HashMap<NamedEntity,List<Suggestion>>(
+ textAnnotations.size());
for (Entry<NamedEntity,List<UriRef>> entry :
textAnnotations.entrySet()) {
try {
- List<Suggestion> entitySuggestions =
computeEntityRecommentations(
- site, entry.getKey(),entry.getValue(),contentLangauge);
- if(entitySuggestions != null && !entitySuggestions.isEmpty()){
+ List<Suggestion> entitySuggestions =
computeEntityRecommentations(site, entry.getKey(),
+ entry.getValue(), contentLangauge);
+ if (entitySuggestions != null && !entitySuggestions.isEmpty())
{
suggestions.put(entry.getKey(), entitySuggestions);
}
} catch (EntityhubException e) {
throw new EngineException(this, ci, e);
}
}
- //now write the results (requires write lock)
+ // now write the results (requires write lock)
ci.getLock().writeLock().lock();
try {
RdfValueFactory factory = RdfValueFactory.getInstance();
- Map<String, Representation> entityData = new
HashMap<String,Representation>();
- for(Entry<NamedEntity,List<Suggestion>> entitySuggestions :
suggestions.entrySet()){
+ Map<String,Representation> entityData = new
HashMap<String,Representation>();
+ for (Entry<NamedEntity,List<Suggestion>> entitySuggestions :
suggestions.entrySet()) {
List<UriRef> subsumed =
textAnnotations.get(entitySuggestions.getKey());
List<NonLiteral> annotationsToRelate = new
ArrayList<NonLiteral>(subsumed);
annotationsToRelate.add(entitySuggestions.getKey().getEntity());
- for(Suggestion suggestion : entitySuggestions.getValue()){
- log.debug("Add Suggestion {} for {}",
suggestion.getEntity().getId(), entitySuggestions.getKey());
+ for (Suggestion suggestion : entitySuggestions.getValue()) {
+ log.debug("Add Suggestion {} for {}",
suggestion.getEntity().getId(),
+ entitySuggestions.getKey());
EnhancementRDFUtils.writeEntityAnnotation(this,
literalFactory, graph, ci.getUri(),
annotationsToRelate, suggestion, nameField,
- //TODO: maybe we want labels in a different language
than the
- // language of the content (e.g. Accept-Language
header)?!
+ // TODO: maybe we want labels in a different
+ // language than the
+ // language of the content (e.g. Accept-Language
+ // header)?!
contentLangauge == null ? DEFAULT_LANGUAGE :
contentLangauge);
if (dereferenceEntities) {
- entityData.put(suggestion.getEntity().getId(),
suggestion.getEntity().getRepresentation());
+ entityData.put(suggestion.getEntity().getId(),
suggestion.getEntity()
+ .getRepresentation());
}
}
}
- //if dereferneceEntities is true the entityData will also contain
all
- //Representations to add! If false entityData will be empty
- for(Representation rep : entityData.values()){
+ // if dereferneceEntities is true the entityData will also contain
+ // all
+ // Representations to add! If false entityData will be empty
+ for (Representation rep : entityData.values()) {
graph.addAll(factory.toRdfRepresentation(rep).getRdfGraph());
}
} finally {
@@ -401,38 +414,50 @@ public class NamedEntityTaggingEngine
/**
* Computes the Enhancements
- * @param site The {@link SiteException} id or <code>null</code> to
- * use the {@link Entityhub}
- * @param literalFactory the {@link LiteralFactory} used to create RDF
Literals
- * @param contentItemId the id of the contentItem
- * @param textAnnotation the text annotation to enhance
- * @param subsumedAnnotations other text annotations for the same entity
- * @param language the language of the analysed text or <code>null</code>
- * if not available.
+ *
+ * @param site
+ * The {@link SiteException} id or <code>null</code> to use the
{@link Entityhub}
+ * @param literalFactory
+ * the {@link LiteralFactory} used to create RDF Literals
+ * @param contentItemId
+ * the id of the contentItem
+ * @param textAnnotation
+ * the text annotation to enhance
+ * @param subsumedAnnotations
+ * other text annotations for the same entity
+ * @param language
+ * the language of the analysed text or <code>null</code> if
not available.
* @return the suggestions for the parsed {@link NamedEntity}
- * @throws EntityhubException On any Error while looking up Entities via
- * the Entityhub
+ * @throws EntityhubException
+ * On any Error while looking up Entities via the Entityhub
*/
protected final List<Suggestion> computeEntityRecommentations(Site site,
- NamedEntity namedEntity,
- List<UriRef> subsumedAnnotations, String language) throws
EntityhubException {
+ NamedEntity
namedEntity,
+ List<UriRef>
subsumedAnnotations,
+ String
language) throws EntityhubException {
// First get the required properties for the parsed textAnnotation
// ... and check the values
log.debug("Process {}", namedEntity);
- FieldQuery query = site == null ? //if site is NULL use the Entityhub
- entityhub.getQueryFactory().createFieldQuery() :
- site.getQueryFactory().createFieldQuery();
- // replace spaces with plus to create an AND search for all words in
the name!
+ // if site is NULL use
+ // the Entityhub
+ FieldQueryFactory queryFactory = site == null ?
entityhub.getQueryFactory() : site.getQueryFactory();
+
+ log.trace("Will use a query-factory of type [{}].",
queryFactory.getClass().toString());
+
+ FieldQuery query = queryFactory.createFieldQuery();
+
+ // replace spaces with plus to create an AND search for all words in
the
+ // name!
Constraint labelConstraint;
- //TODO: make case sensitivity configurable
+ // TODO: make case sensitivity configurable
boolean casesensitive = false;
String namedEntityLabel = casesensitive ? namedEntity.getName() :
namedEntity.getName().toLowerCase();
- if(language != null){
- //search labels in the language and without language
- labelConstraint = new
TextConstraint(namedEntityLabel,casesensitive,language,null);
+ if (language != null) {
+ // search labels in the language and without language
+ labelConstraint = new TextConstraint(namedEntityLabel,
casesensitive, language, null);
} else {
- labelConstraint = new
TextConstraint(namedEntityLabel,casesensitive);
+ labelConstraint = new TextConstraint(namedEntityLabel,
casesensitive);
}
query.setConstraint(nameField, labelConstraint);
if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
@@ -466,73 +491,85 @@ public class NamedEntityTaggingEngine
return Collections.emptyList();
}
}
- query.setLimit(Math.max(20,this.numSuggestions*3));
- QueryResultList<Entity> results = site == null? //if site is NULL
- entityhub.findEntities(query) : //use the Entityhub
- site.findEntities(query); //else the referenced site
+ query.setLimit(Math.max(20, this.numSuggestions * 3));
+
+ log.trace("A query has been created of type [{}] and the following
settings:\n{}", query.getClass()
+ .toString(), query.toString());
+
+ if (null == site) log.trace("A query will be sent to the entity-hub of
type [{}].", entityhub
+ .getClass());
+ else log.trace("A query will be sent to a site [id :: {}][type ::
{}].", site.getId(), site
+ .getClass());
+
+ QueryResultList<Entity> results = site == null ? // if site is NULL
+ entityhub.findEntities(query)
+ : // use the Entityhub
+ site.findEntities(query); // else the referenced site
log.debug(" - {} results returned by query {}", results.size(),
results.getQuery());
- if(results.isEmpty()){ //no results nothing to do
+ if (results.isEmpty()) { // no results nothing to do
return Collections.emptyList();
}
- //we need to normalise the confidence values from [0..1]
+ // we need to normalise the confidence values from [0..1]
// * levenshtein distance as absolute (1.0 for exact match)
// * Solr scores * levenshtein to rank entities relative to each other
Float maxScore = null;
Float maxExactScore = null;
List<Suggestion> matches = new ArrayList<Suggestion>(numSuggestions);
- //assumes entities are sorted by score
- for (Iterator<Entity> guesses = results.iterator();guesses.hasNext();)
{
+ // assumes entities are sorted by score
+ for (Iterator<Entity> guesses = results.iterator();
guesses.hasNext();) {
Suggestion match = new Suggestion(guesses.next());
Representation rep = match.getEntity().getRepresentation();
- Float score =
rep.getFirst(RdfResourceEnum.resultScore.getUri(),Float.class);
- if(maxScore == null){
+ Float score = rep.getFirst(RdfResourceEnum.resultScore.getUri(),
Float.class);
+ if (maxScore == null) {
maxScore = score;
}
Iterator<Text> labels = rep.getText(nameField);
- while(labels.hasNext() && match.getLevenshtein() < 1.0){
+ while (labels.hasNext() && match.getLevenshtein() < 1.0) {
Text label = labels.next();
- if(language == null || //if the content language is unknown ->
accept all labels
- label.getLanguage() == null || //accept labels with
no language
- //and labels in the same language as the content
- (language != null &&
label.getLanguage().startsWith(language))){
+ if (language == null || // if the content language is unknown
->
+ // accept all labels
+ label.getLanguage() == null || // accept labels with no
+ // language
+ // and labels in the same language as the content
+ (language != null &&
label.getLanguage().startsWith(language))) {
double actMatch = levenshtein(
- casesensitive ? label.getText().toLowerCase() :
label.getText(),
- namedEntityLabel);
- if(actMatch > match.getLevenshtein()){
+ casesensitive ? label.getText().toLowerCase() :
label.getText(), namedEntityLabel);
+ if (actMatch > match.getLevenshtein()) {
match.setLevenshtein(actMatch);
match.setMatchedLabel(label);
}
}
}
- if(match.getMatchedLabel() != null){
- if(match.getLevenshtein() == 1.0){
- if(maxExactScore == null){
+ if (match.getMatchedLabel() != null) {
+ if (match.getLevenshtein() == 1.0) {
+ if (maxExactScore == null) {
maxExactScore = score;
}
- //normalise exact matches against the best exact score
-
match.setScore(score.doubleValue()/maxExactScore.doubleValue());
+ // normalise exact matches against the best exact score
+ match.setScore(score.doubleValue() /
maxExactScore.doubleValue());
} else {
- //normalise partial matches against the best match and the
- //Levenshtein similarity with the label
-
match.setScore(score.doubleValue()*match.getLevenshtein()/maxScore.doubleValue());
+ // normalise partial matches against the best match and the
+ // Levenshtein similarity with the label
+ match.setScore(score.doubleValue() *
match.getLevenshtein() / maxScore.doubleValue());
}
matches.add(match);
} else {
- log.debug("No value of {} for Entity
{}!",nameField,match.getEntity().getId());
+ log.debug("No value of {} for Entity {}!", nameField,
match.getEntity().getId());
}
}
- //now sort the results
+ // now sort the results
Collections.sort(matches);
- return matches.subList(0, Math.min(matches.size(),numSuggestions));
+ return matches.subList(0, Math.min(matches.size(), numSuggestions));
}
/**
- * This EnhancementEngine can enhance any ContentItem as it does consume
- * existing TextAnnotations with the configured dc:type's
+ * This EnhancementEngine can enhance any ContentItem as it does consume
existing TextAnnotations with the
+ * configured dc:type's
+ *
* @see
org.apache.stanbol.enhancer.servicesapi.EnhancementEngine#canEnhance(org.apache.stanbol.enhancer.servicesapi.ContentItem)
*/
public int canEnhance(ContentItem ci) {
- return ENHANCE_ASYNC; //Entity tagging now supports asyc processing
+ return ENHANCE_ASYNC; // Entity tagging now supports asyc processing
}
@Override
@@ -540,23 +577,29 @@ public class NamedEntityTaggingEngine
return
Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
(Object) defaultOrder));
}
+
/**
- * Compares two strings (after {@link StringUtils#trim(String) trimming})
- * by using the Levenshtein's Edit Distance of the two
- * strings. Does not return the {@link Integer} number of changes but
- * <code>1-(changes/maxStringSizeAfterTrim)</code><p>
- * @param s1 the first string
- * @param s2 the second string
+ * Compares two strings (after {@link StringUtils#trim(String) trimming})
by using the Levenshtein's Edit
+ * Distance of the two strings. Does not return the {@link Integer} number
of changes but
+ * <code>1-(changes/maxStringSizeAfterTrim)</code>
+ * <p>
+ *
+ * @param s1
+ * the first string
+ * @param s2
+ * the second string
* @return the distance
- * @throws IllegalArgumentException if any of the two parsed strings is
NULL
+ * @throws IllegalArgumentException
+ * if any of the two parsed strings is NULL
*/
- private static double levenshtein(String s1, String s2) {
- if(s1 == null || s2 == null){
+ private static double levenshtein(String s1, String s2) {
+ if (s1 == null || s2 == null) {
throw new IllegalArgumentException("NONE of the parsed String MUST
BE NULL!");
}
s1 = StringUtils.trim(s1);
s2 = StringUtils.trim(s2);
- return s1.isEmpty() || s2.isEmpty() ? 0 :
- 1.0 - (((double)getLevenshteinDistance(s1, s2)) /
((double)(Math.max(s1.length(), s2.length()))));
+ return s1.isEmpty() || s2.isEmpty() ? 0
+ : 1.0 - (((double) getLevenshteinDistance(s1, s2)) / ((double)
(Math.max(s1.length(),
+ s2.length()))));
}
}