Author: rwesten
Date: Thu Aug 23 07:50:45 2012
New Revision: 1376396

URL: http://svn.apache.org/viewvc?rev=1376396&view=rev
Log:
Merged revisions 1376385 and 1376046 from trunk

Modified:
    
incubator/stanbol/branches/dbpedia-spotlight-engines/chain/allactive/src/main/java/org/apache/stanbol/enhancer/chain/allactive/impl/DefaultChain.java
    
incubator/stanbol/branches/dbpedia-spotlight-engines/chain/allactive/src/main/resources/OSGI-INF/metatype/metatype.properties
    
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
    
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/main/resources/OSGI-INF/metatype/metatype.properties
    
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java

Modified: 
incubator/stanbol/branches/dbpedia-spotlight-engines/chain/allactive/src/main/java/org/apache/stanbol/enhancer/chain/allactive/impl/DefaultChain.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/chain/allactive/src/main/java/org/apache/stanbol/enhancer/chain/allactive/impl/DefaultChain.java?rev=1376396&r1=1376395&r2=1376396&view=diff
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/chain/allactive/src/main/java/org/apache/stanbol/enhancer/chain/allactive/impl/DefaultChain.java
 (original)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/chain/allactive/src/main/java/org/apache/stanbol/enhancer/chain/allactive/impl/DefaultChain.java
 Thu Aug 23 07:50:45 2012
@@ -29,6 +29,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.osgi.framework.Constants;
 import org.osgi.framework.ServiceRegistration;
+import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
 
 /**
@@ -51,24 +52,47 @@ public class DefaultChain {
     
     @Property(boolValue=DefaultChain.DEFAULT_STATE)
     public static final String PROPERTY_ENABLED = 
"stanbol.enhancer.chain.default.enabled";
-        
+    
+    @Property(value=DefaultChain.DEFAULT_NAME)
+    public static final String PROPERTY_NAME = 
"stanbol.enhancer.chain.default.name";
+    
+    
     public static final boolean DEFAULT_STATE = true;
+    public static final String DEFAULT_NAME = "default";
     
     private ServiceRegistration defaultChainReg;
     private AllActiveEnginesChain defaultChain;
     
     @Activate
-    protected void activate(ComponentContext ctx){
+    protected void activate(ComponentContext ctx) throws 
ConfigurationException {
         boolean enabled = DEFAULT_STATE;
         Object value = ctx.getProperties().get(PROPERTY_ENABLED);
         if(value != null){
             enabled = Boolean.parseBoolean(value.toString());
         }
+        value = ctx.getProperties().get(PROPERTY_NAME);
+        String name = value == null ? DEFAULT_NAME : value.toString();
+        if(name.isEmpty()){
+            throw new ConfigurationException(PROPERTY_NAME, "The parsed name 
for the default chain MUST NOT be empty!");
+        }
+        int ranking;
+        value = ctx.getProperties().get(Constants.SERVICE_RANKING);
+        if(value instanceof Number){
+            ranking = ((Number)value).intValue();
+        } else if(value != null){
+            try {
+                ranking = Integer.parseInt(value.toString());
+            }catch (NumberFormatException e) {
+                throw new ConfigurationException(Constants.SERVICE_RANKING, 
"Unable to pase Integer service.ranking value",e);
+            }
+        } else {
+            ranking = Integer.MIN_VALUE;
+        }
         if(enabled){
-            defaultChain = new 
AllActiveEnginesChain(ctx.getBundleContext(),"default");
+            defaultChain = new 
AllActiveEnginesChain(ctx.getBundleContext(),name);
             Dictionary<String,Object> properties = new 
Hashtable<String,Object>();
             properties.put(Chain.PROPERTY_NAME, defaultChain.getName());
-            properties.put(Constants.SERVICE_RANKING, Integer.MIN_VALUE);
+            properties.put(Constants.SERVICE_RANKING, ranking);
             defaultChainReg = ctx.getBundleContext().registerService(
                 Chain.class.getName(), defaultChain, properties);
         }

Modified: 
incubator/stanbol/branches/dbpedia-spotlight-engines/chain/allactive/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/chain/allactive/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1376396&r1=1376395&r2=1376396&view=diff
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/chain/allactive/src/main/resources/OSGI-INF/metatype/metatype.properties
 (original)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/chain/allactive/src/main/resources/OSGI-INF/metatype/metatype.properties
 Thu Aug 23 07:50:45 2012
@@ -28,7 +28,12 @@ the default Chain including all currentl
 
 stanbol.enhancer.chain.default.enabled.name=Enabled
 stanbol.enhancer.chain.default.enabled.description=Allows to enable/disable 
the registration \
-the default chain. See the documentation for more information about that 
feature.
+the default chain including all currently active Enhancement Engines. \
+See the documentation for more information about that feature.
+
+stanbol.enhancer.chain.default.name.name=Name
+stanbol.enhancer.chain.default.name.description=The name of the Enhancement 
Chain that includes \
+all active Enhancement Chains.
 
 
#===============================================================================
 # AllActiveEnginesChain

Modified: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java?rev=1376396&r1=1376395&r2=1376396&view=diff
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
 (original)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
 Thu Aug 23 07:50:45 2012
@@ -77,9 +77,14 @@ public class LanguageDetectionEnhancemen
     /**
      * a configurable value of the text segment length to check
      */
-    @Property
+    @Property(intValue=LanguageDetectionEnhancementEngine.PROBE_LENGTH_DEFAULT)
     public static final String PROBE_LENGTH_PROP = 
"org.apache.stanbol.enhancer.engines.langdetect.probe-length";
 
+    /**
+     * a configurable value of the maximum number of suggested languages
+     */
+    
@Property(intValue=LanguageDetectionEnhancementEngine.DEFAULT_MAX_SUGGESTED_LANGUAGES)
+    public static final String MAX_SUGGESTED_PROP = 
"org.apache.stanbol.enhancer.engines.langdetect.max-suggested";
 
     /**
      * The default value for the Execution of this Engine. Currently set to
@@ -105,7 +110,19 @@ public class LanguageDetectionEnhancemen
      */
     private static final Logger log = 
LoggerFactory.getLogger(LanguageDetectionEnhancementEngine.class);
 
-    private static final int PROBE_LENGTH_DEFAULT = 1000;
+    /*
+     * NOTE: Checked the Documentation: The tool already supports the taking
+     * of several shorter samples randomly distributed over the parsed text
+     * to imrpove results and reduce noise. See
+     * http://code.google.com/p/language-detection/wiki/FrequentlyAskedQuestion
+     * "Each detected language differs for the same document" for a hint. 
+     */
+    private static final int PROBE_LENGTH_DEFAULT = -1;
+
+    /**
+     * Default value for the maximum number of suggested Languages
+     */
+    private static final int DEFAULT_MAX_SUGGESTED_LANGUAGES = 3;
 
     /**
      * How much text should be used for testing: If the value is 0 or smaller,
@@ -114,6 +131,8 @@ public class LanguageDetectionEnhancemen
      */
     private int probeLength = PROBE_LENGTH_DEFAULT;
     
+    private int maxSuggestedLanguages = DEFAULT_MAX_SUGGESTED_LANGUAGES;
+    
     /**
      * The literal factory
      */
@@ -134,8 +153,34 @@ public class LanguageDetectionEnhancemen
         if (ce != null) {
             @SuppressWarnings("unchecked")
             Dictionary<String, String> properties = ce.getProperties();
-            String lengthVal = properties.get(PROBE_LENGTH_PROP);
-            probeLength = lengthVal == null ? PROBE_LENGTH_DEFAULT : 
Integer.parseInt(lengthVal);
+            Object value = properties.get(PROBE_LENGTH_PROP);
+            if(value instanceof Number){
+                probeLength = ((Number)value).intValue();
+            } else if(value != null){
+                try {
+                    probeLength = Integer.parseInt(value.toString());
+                } catch (NumberFormatException e) {
+                    throw new ConfigurationException(PROBE_LENGTH_PROP, 
+                        "The parsed 'proble length' MUST be a valid Integer", 
e);
+                }
+            } else {
+                probeLength = PROBE_LENGTH_DEFAULT;
+            }
+            value = properties.get(MAX_SUGGESTED_PROP);
+            if(value instanceof Number){
+                maxSuggestedLanguages = ((Number)value).intValue();
+            } else if(value != null){
+                try {
+                    maxSuggestedLanguages = Integer.parseInt(value.toString());
+                } catch (NumberFormatException e) {
+                    throw new ConfigurationException(MAX_SUGGESTED_PROP, 
+                        "The parsed number of the maximum suggested lanugages "
+                        + "MUST BE a valid Integer", e);
+                }
+            }
+            if(maxSuggestedLanguages < 1){
+                maxSuggestedLanguages = DEFAULT_MAX_SUGGESTED_LANGUAGES;
+            }
         }
         languageIdentifier = new LanguageIdentifier();
     }
@@ -143,6 +188,8 @@ public class LanguageDetectionEnhancemen
     protected void deactivate(ComponentContext ce) {
         super.deactivate(ce);
         this.languageIdentifier = null;
+        this.maxSuggestedLanguages = -1;
+        this.probeLength = -1;
     }
 
     public int canEnhance(ContentItem ci) throws EngineException {
@@ -190,16 +237,20 @@ public class LanguageDetectionEnhancemen
         }
         
         // add language to metadata
-        if (languages.size() > 0) {
+        if (languages != null) {
             MGraph g = ci.getMetadata();
             ci.getLock().writeLock().lock();
-            // add best hypothesis
-            Language oneLang = languages.get(0);
             try {
-                UriRef textEnhancement = 
EnhancementEngineHelper.createTextEnhancement(ci, this);
-                g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new 
PlainLiteralImpl(oneLang.lang)));
-                g.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, 
literalFactory.createTypedLiteral(oneLang.prob)));
-                g.add(new TripleImpl(textEnhancement, DC_TYPE, 
DCTERMS_LINGUISTIC_SYSTEM));
+                for(int i=0;i<maxSuggestedLanguages && i<languages.size();i++){
+                    // add a hypothesis
+                    Language hypothesis = languages.get(i);
+                    UriRef textEnhancement = 
EnhancementEngineHelper.createTextEnhancement(ci, this);
+                    g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new 
PlainLiteralImpl(hypothesis.lang)));
+                    g.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, 
literalFactory.createTypedLiteral(hypothesis.prob)));
+                    g.add(new TripleImpl(textEnhancement, DC_TYPE, 
DCTERMS_LINGUISTIC_SYSTEM));
+                    g.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, 
+                        literalFactory.createTypedLiteral(hypothesis.prob)));
+                }
             } finally {
                 ci.getLock().writeLock().unlock();
             }

Modified: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1376396&r1=1376395&r2=1376396&view=diff
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/main/resources/OSGI-INF/metatype/metatype.properties
 (original)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/main/resources/OSGI-INF/metatype/metatype.properties
 Thu Aug 23 07:50:45 2012
@@ -30,3 +30,13 @@ org.apache.stanbol.enhancer.engines.lang
 Enhancer Engine: Language Identification
 
org.apache.stanbol.enhancer.engines.langdetect.LanguageDetectionEnhancementEngine.description=Detects
 \
 the Language for parsed Text.
+
+org.apache.stanbol.enhancer.engines.langdetect.max-suggested.name=Max 
Suggested Languages
+org.apache.stanbol.enhancer.engines.langdetect.max-suggested.description=This \
+Engine supports the suggestion of multiple languages with confidence values. 
This \
+allows to configure how much languages are suggested at a maximum (default: 3).
+
+org.apache.stanbol.enhancer.engines.langdetect.probe-length.name=Probe Length
+org.apache.stanbol.enhancer.engines.langdetect.probe-length.description= The \
+maximum number of characters used for language detection. Note that the used \
+library already supports random selection of text parts (default: -1 
(deactivated))

Modified: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java?rev=1376396&r1=1376395&r2=1376396&view=diff
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java
 (original)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/src/test/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEngineTest.java
 Thu Aug 23 07:50:45 2012
@@ -21,6 +21,7 @@ import static org.apache.stanbol.enhance
 import static 
org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -40,6 +41,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.osgi.service.cm.ConfigurationException;
@@ -122,10 +124,9 @@ public class LanguageDetectionEngineTest
         expectedValues.put(Properties.DC_CREATOR, 
LiteralFactory.getInstance().createTypedLiteral(
             langIdEngine.getClass().getName()));
         int textAnnotationCount = validateAllTextAnnotations(ci.getMetadata(), 
text, expectedValues);
-        assertEquals("A single TextAnnotation is expected", 
1,textAnnotationCount);
-        //even through this tests do not validate service quality but rather
-        //the correct integration of the CELI service as EnhancementEngine
-        //we expect the "en" is detected for the parsed text
+        assertTrue("A TextAnnotation is expected", textAnnotationCount > 0);
+        //even through this tests do not validate detection quality
+        //we expect the "en" is detected as best guess for the parsed text
         assertEquals("The detected language for text '"+text+"' MUST BE 'en'",
             "en",EnhancementEngineHelper.getLanguage(ci));
 


Reply via email to