generi...

rwesten Mon, 23 Jul 2012 00:19:36 -0700

Author: rwesten
Date: Mon Jul 23 07:19:09 2012
New Revision: 1364535

URL: http://svn.apache.org/viewvc?rev=1364535&view=rev
Log:
fixes STANBOL-697:


Code Change is limited to:

    SparqlQueryUtils#addDataTypeValueConstraint(..)

This method supports multiple values. In the "any" mode (Entities need
only to have one of the parsed values) it adds UNIONS to the SPARQL
query. In case only a single value is parsed than the starting '{' was
added, but the '}' was never added.

That means that the reason for this is not a missing '}' but an
unintended '{' that is added for ValueConstraints (and
ReferenceConstraints) with a single value.

see http://markmail.org/message/rgc27umxg3mmcsjx for details

Other changes are related to the patch provided by David

* Adding trace level loggings
* toString methods for some classes
* Code formatting

I modified some loggings to use Logger.trace(String,Object[]) instad of 
Logger.trace(String.format(..))

Modified:
    
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
    
incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/impl/ReferencedSiteImpl.java
    
incubator/stanbol/trunk/entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/query/FieldQueryImpl.java
    
incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/Constraint.java
    
incubator/stanbol/trunk/entityhub/query/clerezza/src/main/java/org/apache/stanbol/entityhub/query/clerezza/SparqlFieldQuery.java
    
incubator/stanbol/trunk/entityhub/query/clerezza/src/main/java/org/apache/stanbol/entityhub/query/clerezza/SparqlFieldQueryFactory.java
    
incubator/stanbol/trunk/entityhub/query/clerezza/src/main/java/org/apache/stanbol/entityhub/query/clerezza/SparqlQueryUtils.java
    
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/CoolUriDereferencer.java
    
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/LarqSearcher.java
    
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/SparqlDereferencer.java
    
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/SparqlEndpointUtils.java
    
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/SparqlSearcher.java
    
incubator/stanbol/trunk/entityhub/site/linkeddata/src/main/java/org/apache/stanbol/entityhub/site/linkeddata/impl/VirtuosoSearcher.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1364535&r1=1364534&r2=1364535&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
 Mon Jul 23 07:19:09 2012
@@ -66,6 +66,7 @@ import org.apache.stanbol.entityhub.serv
 import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
 import org.apache.stanbol.entityhub.servicesapi.query.Constraint;
 import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory;
 import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
 import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
 import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
@@ -79,42 +80,45 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Engine that uses a {@link Site} to search for entities for existing 
TextAnnotations of an Content
- * Item.
+ * Engine that uses a {@link Site} to search for entities for existing 
TextAnnotations of an Content Item.
  * 
  * @author ogrisel, rwesten
  */
-@Component(configurationFactory = true, policy = ConfigurationPolicy.REQUIRE, 
// the baseUri is required!
+@Component(configurationFactory = true, policy = ConfigurationPolicy.REQUIRE, 
// the
+// baseUri
+// is
+// required!
 specVersion = "1.1", metatype = true, immediate = true, inherit = true)
 @Service
[email protected](value={
-    @Property(name=EnhancementEngine.PROPERTY_NAME)
-})
-public class NamedEntityTaggingEngine 
-        extends AbstractEnhancementEngine<RuntimeException,RuntimeException> 
[email protected](value = {@Property(name = 
EnhancementEngine.PROPERTY_NAME)})
+public class NamedEntityTaggingEngine extends 
AbstractEnhancementEngine<RuntimeException,RuntimeException>
         implements EnhancementEngine, ServiceProperties {
 
     private final Logger log = LoggerFactory.getLogger(getClass());
 
-    @Property//(value = "dbpedia")
+    @Property
+    // (value = "dbpedia")
     public static final String REFERENCED_SITE_ID = 
"org.apache.stanbol.enhancer.engines.entitytagging.referencedSiteId";
 
     @Property(boolValue = false)
     public static final String PERSON_STATE = 
"org.apache.stanbol.enhancer.engines.entitytagging.personState";
 
-    @Property//(value = "dbp-ont:Person")
+    @Property
+    // (value = "dbp-ont:Person")
     public static final String PERSON_TYPE = 
"org.apache.stanbol.enhancer.engines.entitytagging.personType";
 
     @Property(boolValue = false)
     public static final String ORG_STATE = 
"org.apache.stanbol.enhancer.engines.entitytagging.organisationState";
 
-    @Property//(value = "dbp-ont:Organisation")
+    @Property
+    // (value = "dbp-ont:Organisation")
     public static final String ORG_TYPE = 
"org.apache.stanbol.enhancer.engines.entitytagging.organisationType";
 
     @Property(boolValue = false)
     public static final String PLACE_STATE = 
"org.apache.stanbol.enhancer.engines.entitytagging.placeState";
 
-    @Property//(value = "dbp-ont:Place")
+    @Property
+    // (value = "dbp-ont:Place")
     public static final String PLACE_TYPE = 
"org.apache.stanbol.enhancer.engines.entitytagging.placeType";
     /**
      * Use the RDFS label as default
@@ -128,31 +132,30 @@ public class NamedEntityTaggingEngine 
     @Property(boolValue = true)
     public static final String DEREFERENCE_ENTITIES = 
"org.apache.stanbol.enhancer.engines.entitytagging.dereference";
 
-    @Property(intValue=0)
+    @Property(intValue = 0)
     public static final String SERVICE_RANKING = Constants.SERVICE_RANKING;
     /**
-     * The default language for labels included in the enhancement metadata
-     * (if not available for the parsed content).
+     * The default language for labels included in the enhancement metadata 
(if not available for the parsed
+     * content).
      */
     private static final String DEFAULT_LANGUAGE = "en";
-    
+
     /**
-     * Service of the Entityhub that manages all the active referenced Site. 
This Service is used to lookup the
-     * configured Referenced Site when we need to enhance a content item.
+     * Service of the Entityhub that manages all the active referenced Site. 
This Service is used to lookup
+     * the configured Referenced Site when we need to enhance a content item.
      */
     @Reference
     protected SiteManager siteManager;
 
     /**
-     * Used to lookup Entities if the {@link #REFERENCED_SITE_ID} property is
-     * set to "entityhub" or "local"
+     * Used to lookup Entities if the {@link #REFERENCED_SITE_ID} property is 
set to "entityhub" or "local"
      */
     @Reference
     protected Entityhub entityhub;
-    
+
     /**
-     * This holds the id of the {@link Site} used to lookup Entities
-     * or <code>null</code> if the {@link Entityhub} is used. 
+     * This holds the id of the {@link Site} used to lookup Entities or 
<code>null</code> if the
+     * {@link Entityhub} is used.
      */
     protected String referencedSiteID;
 
@@ -162,8 +165,6 @@ public class NamedEntityTaggingEngine 
      */
     public static final Integer defaultOrder = ORDERING_EXTRACTION_ENHANCEMENT;
 
-
-
     /**
      * State if text annotations of type {@link 
OntologicalClasses#DBPEDIA_PERSON} are enhanced by this engine
      */
@@ -207,13 +208,13 @@ public class NamedEntityTaggingEngine 
      * The number of Suggestions to be added
      */
     protected Integer numSuggestions = 3;
-    
+
     protected boolean dereferenceEntities = true;
 
     /**
      * The {@link OfflineMode} is used by Stanbol to indicate that no external 
service should be referenced.
-     * For this engine that means it is necessary to check if the used {@link 
Site} can operate
-     * offline or not.
+     * For this engine that means it is necessary to check if the used {@link 
Site} can operate offline or
+     * not.
      * 
      * @see #enableOfflineMode(OfflineMode)
      * @see #disableOfflineMode(OfflineMode)
@@ -240,7 +241,7 @@ public class NamedEntityTaggingEngine 
     }
 
     /**
-     * Returns <code>true</code> only if Stanbol operates in {@link 
OfflineMode}.
+     * Returns <code>true</code> only if Stanbol operates in {@link 
OfflineMode} .
      * 
      * @return the offline state
      */
@@ -264,7 +265,7 @@ public class NamedEntityTaggingEngine 
             throw new ConfigurationException(REFERENCED_SITE_ID,
                     "The ID of the Referenced Site is a required Parameter and 
MUST NOT be an empty String!");
         }
-        
if(Entityhub.ENTITYHUB_IDS.contains(this.referencedSiteID.toLowerCase())){
+        if 
(Entityhub.ENTITYHUB_IDS.contains(this.referencedSiteID.toLowerCase())) {
             log.debug("Init NamedEntityTaggingEngine instance for the 
Entityhub");
             this.referencedSiteID = null;
         }
@@ -303,23 +304,25 @@ public class NamedEntityTaggingEngine 
 
     public void computeEnhancements(ContentItem ci) throws EngineException {
         final Site site;
-        if(referencedSiteID != null) { //lookup the referenced site
+        if (referencedSiteID != null) { // lookup the referenced site
             site = siteManager.getSite(referencedSiteID);
-            //ensure that it is present
+            // ensure that it is present
             if (site == null) {
                 String msg = String.format(
-                    "Unable to enhance %s because Referenced Site %s is 
currently not active!", 
-                    ci.getUri().getUnicodeString(), referencedSiteID);
+                    "Unable to enhance %s because Referenced Site %s is 
currently not active!", ci.getUri()
+                            .getUnicodeString(), referencedSiteID);
                 log.warn(msg);
-                // TODO: throwing Exceptions is currently deactivated. We need 
a more clear
+                // TODO: throwing Exceptions is currently deactivated. We need 
a
+                // more clear
                 // policy what do to in such situations
                 // throw new EngineException(msg);
                 return;
             }
-            //and that it supports offline mode if required
+            // and that it supports offline mode if required
             if (isOfflineMode() && !site.supportsLocalMode()) {
-                log.warn("Unable to enhance ci {} because OfflineMode is not 
supported by ReferencedSite {}.",
-                    ci.getUri().getUnicodeString(), site.getId());
+                log.warn(
+                    "Unable to enhance ci {} because OfflineMode is not 
supported by ReferencedSite {}.", ci
+                            .getUri().getUnicodeString(), site.getId());
                 return;
             }
         } else { // null indicates to use the Entityhub to lookup Entities
@@ -329,7 +332,8 @@ public class NamedEntityTaggingEngine 
         LiteralFactory literalFactory = LiteralFactory.getInstance();
         // Retrieve the existing text annotations (requires read lock)
         Map<NamedEntity,List<UriRef>> textAnnotations = new 
HashMap<NamedEntity,List<UriRef>>();
-        //the language extracted for the parsed content or NULL if not 
available
+        // the language extracted for the parsed content or NULL if not
+        // available
         String contentLangauge;
         ci.getLock().readLock().lock();
         try {
@@ -338,14 +342,17 @@ public class NamedEntityTaggingEngine 
                     .hasNext();) {
                 UriRef uri = (UriRef) it.next().getSubject();
                 if (graph.filter(uri, Properties.DC_RELATION, null).hasNext()) 
{
-                    // this is not the most specific occurrence of this name: 
skip
+                    // this is not the most specific occurrence of this name:
+                    // skip
                     continue;
                 }
                 NamedEntity namedEntity = 
NamedEntity.createFromTextAnnotation(graph, uri);
-                if(namedEntity != null){
-                    // This is a first occurrence, collect any subsumed 
annotations
+                if (namedEntity != null) {
+                    // This is a first occurrence, collect any subsumed
+                    // annotations
                     List<UriRef> subsumed = new ArrayList<UriRef>();
-                    for (Iterator<Triple> it2 = graph.filter(null, 
Properties.DC_RELATION, uri); it2.hasNext();) {
+                    for (Iterator<Triple> it2 = graph.filter(null, 
Properties.DC_RELATION, uri); it2
+                            .hasNext();) {
                         subsumed.add((UriRef) it2.next().getSubject());
                     }
                     textAnnotations.put(namedEntity, subsumed);
@@ -354,43 +361,49 @@ public class NamedEntityTaggingEngine 
         } finally {
             ci.getLock().readLock().unlock();
         }
-        //search the suggestions
-        Map<NamedEntity,List<Suggestion>> suggestions = new 
HashMap<NamedEntity,List<Suggestion>>(textAnnotations.size());
+        // search the suggestions
+        Map<NamedEntity,List<Suggestion>> suggestions = new 
HashMap<NamedEntity,List<Suggestion>>(
+                textAnnotations.size());
         for (Entry<NamedEntity,List<UriRef>> entry : 
textAnnotations.entrySet()) {
             try {
-                List<Suggestion> entitySuggestions = 
computeEntityRecommentations(
-                    site, entry.getKey(),entry.getValue(),contentLangauge);
-                if(entitySuggestions != null && !entitySuggestions.isEmpty()){
+                List<Suggestion> entitySuggestions = 
computeEntityRecommentations(site, entry.getKey(),
+                    entry.getValue(), contentLangauge);
+                if (entitySuggestions != null && !entitySuggestions.isEmpty()) 
{
                     suggestions.put(entry.getKey(), entitySuggestions);
                 }
             } catch (EntityhubException e) {
                 throw new EngineException(this, ci, e);
             }
         }
-        //now write the results (requires write lock)
+        // now write the results (requires write lock)
         ci.getLock().writeLock().lock();
         try {
             RdfValueFactory factory = RdfValueFactory.getInstance();
-            Map<String, Representation> entityData = new 
HashMap<String,Representation>();
-            for(Entry<NamedEntity,List<Suggestion>> entitySuggestions : 
suggestions.entrySet()){
+            Map<String,Representation> entityData = new 
HashMap<String,Representation>();
+            for (Entry<NamedEntity,List<Suggestion>> entitySuggestions : 
suggestions.entrySet()) {
                 List<UriRef> subsumed = 
textAnnotations.get(entitySuggestions.getKey());
                 List<NonLiteral> annotationsToRelate = new 
ArrayList<NonLiteral>(subsumed);
                 
annotationsToRelate.add(entitySuggestions.getKey().getEntity());
-                for(Suggestion suggestion : entitySuggestions.getValue()){
-                    log.debug("Add Suggestion {} for {}", 
suggestion.getEntity().getId(), entitySuggestions.getKey());
+                for (Suggestion suggestion : entitySuggestions.getValue()) {
+                    log.debug("Add Suggestion {} for {}", 
suggestion.getEntity().getId(),
+                        entitySuggestions.getKey());
                     EnhancementRDFUtils.writeEntityAnnotation(this, 
literalFactory, graph, ci.getUri(),
                         annotationsToRelate, suggestion, nameField,
-                        //TODO: maybe we want labels in a different language 
than the
-                        //      language of the content (e.g. Accept-Language 
header)?!
+                        // TODO: maybe we want labels in a different
+                        // language than the
+                        // language of the content (e.g. Accept-Language
+                        // header)?!
                         contentLangauge == null ? DEFAULT_LANGUAGE : 
contentLangauge);
                     if (dereferenceEntities) {
-                        entityData.put(suggestion.getEntity().getId(), 
suggestion.getEntity().getRepresentation());
+                        entityData.put(suggestion.getEntity().getId(), 
suggestion.getEntity()
+                                .getRepresentation());
                     }
                 }
             }
-            //if dereferneceEntities is true the entityData will also contain 
all
-            //Representations to add! If false entityData will be empty
-            for(Representation rep : entityData.values()){
+            // if dereferneceEntities is true the entityData will also contain
+            // all
+            // Representations to add! If false entityData will be empty
+            for (Representation rep : entityData.values()) {
                 graph.addAll(factory.toRdfRepresentation(rep).getRdfGraph());
             }
         } finally {
@@ -401,38 +414,50 @@ public class NamedEntityTaggingEngine 
 
     /**
      * Computes the Enhancements
-     * @param site The {@link SiteException} id or <code>null</code> to
-     * use the {@link Entityhub}
-     * @param literalFactory the {@link LiteralFactory} used to create RDF 
Literals
-     * @param contentItemId the id of the contentItem
-     * @param textAnnotation the text annotation to enhance
-     * @param subsumedAnnotations other text annotations for the same entity 
-     * @param language the language of the analysed text or <code>null</code>
-     * if not available.
+     * 
+     * @param site
+     *            The {@link SiteException} id or <code>null</code> to use the 
{@link Entityhub}
+     * @param literalFactory
+     *            the {@link LiteralFactory} used to create RDF Literals
+     * @param contentItemId
+     *            the id of the contentItem
+     * @param textAnnotation
+     *            the text annotation to enhance
+     * @param subsumedAnnotations
+     *            other text annotations for the same entity
+     * @param language
+     *            the language of the analysed text or <code>null</code> if 
not available.
      * @return the suggestions for the parsed {@link NamedEntity}
-     * @throws EntityhubException On any Error while looking up Entities via
-     * the Entityhub
+     * @throws EntityhubException
+     *             On any Error while looking up Entities via the Entityhub
      */
     protected final List<Suggestion> computeEntityRecommentations(Site site,
-            NamedEntity namedEntity,
-            List<UriRef> subsumedAnnotations, String language) throws 
EntityhubException {
+                                                                  NamedEntity 
namedEntity,
+                                                                  List<UriRef> 
subsumedAnnotations,
+                                                                  String 
language) throws EntityhubException {
         // First get the required properties for the parsed textAnnotation
         // ... and check the values
 
         log.debug("Process {}", namedEntity);
-        FieldQuery query = site == null ? //if site is NULL use the Entityhub
-                entityhub.getQueryFactory().createFieldQuery() : 
-                    site.getQueryFactory().createFieldQuery();
-        // replace spaces with plus to create an AND search for all words in 
the name!
+        // if site is NULL use
+        // the Entityhub
+        FieldQueryFactory queryFactory = site == null ? 
entityhub.getQueryFactory() : site.getQueryFactory();
+
+        log.trace("Will use a query-factory of type [{}].", 
queryFactory.getClass().toString());
+
+        FieldQuery query = queryFactory.createFieldQuery();
+
+        // replace spaces with plus to create an AND search for all words in 
the
+        // name!
         Constraint labelConstraint;
-        //TODO: make case sensitivity configurable
+        // TODO: make case sensitivity configurable
         boolean casesensitive = false;
         String namedEntityLabel = casesensitive ? namedEntity.getName() : 
namedEntity.getName().toLowerCase();
-        if(language != null){
-            //search labels in the language and without language
-            labelConstraint = new 
TextConstraint(namedEntityLabel,casesensitive,language,null);
+        if (language != null) {
+            // search labels in the language and without language
+            labelConstraint = new TextConstraint(namedEntityLabel, 
casesensitive, language, null);
         } else {
-            labelConstraint = new 
TextConstraint(namedEntityLabel,casesensitive);
+            labelConstraint = new TextConstraint(namedEntityLabel, 
casesensitive);
         }
         query.setConstraint(nameField, labelConstraint);
         if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
@@ -466,73 +491,85 @@ public class NamedEntityTaggingEngine 
                 return Collections.emptyList();
             }
         }
-        query.setLimit(Math.max(20,this.numSuggestions*3));
-        QueryResultList<Entity> results = site == null? //if site is NULL
-                entityhub.findEntities(query) : //use the Entityhub
-                    site.findEntities(query); //else the referenced site
+        query.setLimit(Math.max(20, this.numSuggestions * 3));
+
+        log.trace("A query has been created of type [{}] and the following 
settings:\n{}", query.getClass()
+                .toString(), query.toString());
+
+        if (null == site) log.trace("A query will be sent to the entity-hub of 
type [{}].", entityhub
+                .getClass());
+        else log.trace("A query will be sent to a site [id :: {}][type :: 
{}].", site.getId(), site
+                .getClass());
+
+        QueryResultList<Entity> results = site == null ? // if site is NULL
+        entityhub.findEntities(query)
+                : // use the Entityhub
+                site.findEntities(query); // else the referenced site
         log.debug(" - {} results returned by query {}", results.size(), 
results.getQuery());
-        if(results.isEmpty()){ //no results nothing to do
+        if (results.isEmpty()) { // no results nothing to do
             return Collections.emptyList();
         }
-        //we need to normalise the confidence values from [0..1]
+        // we need to normalise the confidence values from [0..1]
         // * levenshtein distance as absolute (1.0 for exact match)
         // * Solr scores * levenshtein to rank entities relative to each other
         Float maxScore = null;
         Float maxExactScore = null;
         List<Suggestion> matches = new ArrayList<Suggestion>(numSuggestions);
-        //assumes entities are sorted by score
-        for (Iterator<Entity> guesses = results.iterator();guesses.hasNext();) 
{
+        // assumes entities are sorted by score
+        for (Iterator<Entity> guesses = results.iterator(); 
guesses.hasNext();) {
             Suggestion match = new Suggestion(guesses.next());
             Representation rep = match.getEntity().getRepresentation();
-            Float score = 
rep.getFirst(RdfResourceEnum.resultScore.getUri(),Float.class);
-            if(maxScore == null){
+            Float score = rep.getFirst(RdfResourceEnum.resultScore.getUri(), 
Float.class);
+            if (maxScore == null) {
                 maxScore = score;
             }
             Iterator<Text> labels = rep.getText(nameField);
-            while(labels.hasNext() && match.getLevenshtein() < 1.0){
+            while (labels.hasNext() && match.getLevenshtein() < 1.0) {
                 Text label = labels.next();
-                if(language == null || //if the content language is unknown -> 
accept all labels
-                        label.getLanguage() == null ||  //accept labels with 
no language
-                        //and labels in the same language as the content
-                        (language != null && 
label.getLanguage().startsWith(language))){
+                if (language == null || // if the content language is unknown 
->
+                                        // accept all labels
+                    label.getLanguage() == null || // accept labels with no
+                                                   // language
+                    // and labels in the same language as the content
+                    (language != null && 
label.getLanguage().startsWith(language))) {
                     double actMatch = levenshtein(
-                        casesensitive ? label.getText().toLowerCase() : 
label.getText(), 
-                                namedEntityLabel);
-                    if(actMatch > match.getLevenshtein()){
+                        casesensitive ? label.getText().toLowerCase() : 
label.getText(), namedEntityLabel);
+                    if (actMatch > match.getLevenshtein()) {
                         match.setLevenshtein(actMatch);
                         match.setMatchedLabel(label);
                     }
                 }
             }
-            if(match.getMatchedLabel() != null){
-                if(match.getLevenshtein() == 1.0){
-                    if(maxExactScore == null){
+            if (match.getMatchedLabel() != null) {
+                if (match.getLevenshtein() == 1.0) {
+                    if (maxExactScore == null) {
                         maxExactScore = score;
                     }
-                    //normalise exact matches against the best exact score
-                    
match.setScore(score.doubleValue()/maxExactScore.doubleValue());
+                    // normalise exact matches against the best exact score
+                    match.setScore(score.doubleValue() / 
maxExactScore.doubleValue());
                 } else {
-                    //normalise partial matches against the best match and the
-                    //Levenshtein similarity with the label
-                    
match.setScore(score.doubleValue()*match.getLevenshtein()/maxScore.doubleValue());
+                    // normalise partial matches against the best match and the
+                    // Levenshtein similarity with the label
+                    match.setScore(score.doubleValue() * 
match.getLevenshtein() / maxScore.doubleValue());
                 }
                 matches.add(match);
             } else {
-                log.debug("No value of {} for Entity 
{}!",nameField,match.getEntity().getId());
+                log.debug("No value of {} for Entity {}!", nameField, 
match.getEntity().getId());
             }
         }
-        //now sort the results
+        // now sort the results
         Collections.sort(matches);
-        return matches.subList(0, Math.min(matches.size(),numSuggestions));
+        return matches.subList(0, Math.min(matches.size(), numSuggestions));
     }
 
     /**
-     * This EnhancementEngine can enhance any ContentItem as it does consume
-     * existing TextAnnotations with the configured dc:type's
+     * This EnhancementEngine can enhance any ContentItem as it does consume 
existing TextAnnotations with the
+     * configured dc:type's
+     * 
      * @see 
org.apache.stanbol.enhancer.servicesapi.EnhancementEngine#canEnhance(org.apache.stanbol.enhancer.servicesapi.ContentItem)
      */
     public int canEnhance(ContentItem ci) {
-        return ENHANCE_ASYNC; //Entity tagging now supports asyc processing
+        return ENHANCE_ASYNC; // Entity tagging now supports asyc processing
     }
 
     @Override
@@ -540,23 +577,29 @@ public class NamedEntityTaggingEngine 
         return 
Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
             (Object) defaultOrder));
     }
+
     /**
-     * Compares two strings (after {@link StringUtils#trim(String) trimming})
-     * by using the Levenshtein's Edit Distance of the two
-     * strings. Does not return the {@link Integer} number of changes but
-     * <code>1-(changes/maxStringSizeAfterTrim)</code><p>
-     * @param s1 the first string
-     * @param s2 the second string
+     * Compares two strings (after {@link StringUtils#trim(String) trimming}) 
by using the Levenshtein's Edit
+     * Distance of the two strings. Does not return the {@link Integer} number 
of changes but
+     * <code>1-(changes/maxStringSizeAfterTrim)</code>
+     * <p>
+     * 
+     * @param s1
+     *            the first string
+     * @param s2
+     *            the second string
      * @return the distance
-     * @throws IllegalArgumentException if any of the two parsed strings is 
NULL
+     * @throws IllegalArgumentException
+     *             if any of the two parsed strings is NULL
      */
-    private  static double levenshtein(String s1, String s2) {
-        if(s1 == null || s2 == null){
+    private static double levenshtein(String s1, String s2) {
+        if (s1 == null || s2 == null) {
             throw new IllegalArgumentException("NONE of the parsed String MUST 
BE NULL!");
         }
         s1 = StringUtils.trim(s1);
         s2 = StringUtils.trim(s2);
-        return s1.isEmpty() || s2.isEmpty() ? 0 :
-            1.0 - (((double)getLevenshteinDistance(s1, s2)) / 
((double)(Math.max(s1.length(), s2.length()))));
+        return s1.isEmpty() || s2.isEmpty() ? 0
+                : 1.0 - (((double) getLevenshteinDistance(s1, s2)) / ((double) 
(Math.max(s1.length(),
+                    s2.length()))));
     }
 }

svn commit: r1364535 [1/5] - in /incubator/stanbol/trunk: enhancer/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/ entityhub/generic/core/src/main/java/org/apache/stanbol/entityhub/core/impl/ entityhub/generi...

Reply via email to