Author: rwesten
Date: Tue May 15 08:58:22 2012
New Revision: 1338604

URL: http://svn.apache.org/viewvc?rev=1338604&view=rev
Log:
STANBOL-583: CELI Language Identifiaction Engine

* Engine now uses a write lock when writing the language enhancement
* IOExceptions and SOAPExceptiosn are now forwarded as EngineException
* activation checks for illegal formatted Service URLs
* Unit Test validates now the Language Enhancement
* corrected a hard coded ServiceURI in the LnagId HTTP client

With this changes this engine should be ready for usage. 

Modified:
    
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java
    
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
    
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java

Modified: 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java?rev=1338604&r1=1338603&r2=1338604&view=diff
==============================================================================
--- 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java
 (original)
+++ 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java
 Tue May 15 08:58:22 2012
@@ -4,6 +4,7 @@ import static org.apache.stanbol.enhance
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
 
 import java.io.IOException;
+import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Collections;
 import java.util.Dictionary;
@@ -12,6 +13,8 @@ import java.util.Map;
 import java.util.Set;
 import java.util.Map.Entry;
 
+import javax.xml.soap.SOAPException;
+
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.UriRef;
@@ -26,6 +29,7 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.Chain;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
@@ -48,9 +52,18 @@ public class CeliLanguageIdentifierEnhan
         * This ensures that no connections to external services are made if 
Stanbol is started in offline mode 
         * as the OnlineMode service will only be available if OfflineMode is 
deactivated. 
         */
-       @Reference
+       @SuppressWarnings("unused")
+    @Reference
     private OnlineMode onlineMode; 
-       
+    /**
+     * The default value for the Execution of this Engine. Currently set to
+     * {@link ServiceProperties#ORDERING_PRE_PROCESSING}-2 to ensure that it is
+     * executed before "normal" pre-processing engines.<p>
+     * NOTE: this information is used by the default and weighed {@link Chain}
+     * implementation to determine the processing order of 
+     * {@link EnhancementEngine}s. Other {@link Chain} implementation do not
+     * use this information.
+     */
        public static final Integer defaultOrder = 
ServiceProperties.ORDERING_PRE_PROCESSING -2;
 
        private Logger log = LoggerFactory.getLogger(getClass());
@@ -64,12 +77,16 @@ public class CeliLanguageIdentifierEnhan
         * Set containing the only supported mime type {@link 
#TEXT_PLAIN_MIMETYPE}
         */
        private static final Set<String> SUPPORTED_MIMTYPES = 
Collections.singleton(TEXT_PLAIN_MIMETYPE);
+       /**
+        * The literal factory
+        */
+    private final LiteralFactory literalFactory = LiteralFactory.getInstance();
 
        @Property
-       public static final String LICENSE_KEY = 
"org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.license";
+       public static final String LICENSE_KEY = 
"org.apache.stanbol.enhancer.engines.celi.langid.license";
 
        @Property(value = "http://linguagrid.org/LSGrid/ws/language-identifier";)
-       public static final String SERVICE_URL = 
"org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.url";
+       public static final String SERVICE_URL = 
"org.apache.stanbol.enhancer.engines.celi.langid.url";
 
        private String licenseKey;
        private URL serviceURL;
@@ -87,9 +104,19 @@ public class CeliLanguageIdentifierEnhan
                }
                String url = (String) properties.get(SERVICE_URL);
                if (url == null || url.isEmpty()) {
-                       throw new ConfigurationException(SERVICE_URL, 
String.format("%s : please configure the URL of the CELI Web Service (e.g. by" 
+ "using the 'Configuration' tab of the Apache Felix Web Console).", 
getClass().getSimpleName()));
+                       throw new ConfigurationException(SERVICE_URL, 
+                           String.format("%s : please configure the URL of the 
CELI Web "
+                                   + "Service (e.g. by" + "using the 
'Configuration' tab of "
+                                   +"the Apache Felix Web Console).", 
+                                   getClass().getSimpleName()));
                }
-               this.serviceURL = new URL(url);
+               try {
+                   this.serviceURL = new URL(url);
+               } catch (MalformedURLException e) {
+            throw new ConfigurationException(SERVICE_URL, 
+                String.format("%s : The URL of the CELI Web Service is not 
well formatted.", 
+                    getClass().getSimpleName()),e);
+        }
                this.client = new LanguageIdentifierClientHTTP(this.serviceURL, 
this.licenseKey);
        }
        
@@ -134,19 +161,27 @@ public class CeliLanguageIdentifierEnhan
                                lista = this.client.guessLanguage(text);
                        else 
                                lista = this.client.guessQueryLanguage(text);
-                       LiteralFactory literalFactory = 
LiteralFactory.getInstance();
                        
                        MGraph g = ci.getMetadata();
-                                       
-                       GuessedLanguage gl = lista.get(0);
-                       UriRef textEnhancement = 
EnhancementEngineHelper.createTextEnhancement(ci, this);
-                   g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new 
PlainLiteralImpl(gl.getLang())));
-                       g.add(new TripleImpl(textEnhancement, 
ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(gl.getConfidence())));
-               
+                       //in ENHANCE_ASYNC we need to use read/write locks on 
the ContentItem
+                       ci.getLock().writeLock().lock();
+                       try {
+                       GuessedLanguage gl = lista.get(0);
+                       UriRef textEnhancement = 
EnhancementEngineHelper.createTextEnhancement(ci, this);
+                   g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new 
PlainLiteralImpl(gl.getLang())));
+                       g.add(new TripleImpl(textEnhancement, 
ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(gl.getConfidence())));
+                       } finally {
+                           ci.getLock().writeLock().unlock();
+                       }
                        
-               } catch (Exception e) {
-                       log.error(e.getMessage(),e);
-               }
+               } catch (IOException e) {
+                   throw new EngineException("Error while calling the CELI 
language"
+                       +" identifier service (configured URL: "
+                       +serviceURL+")!",e);
+        } catch (SOAPException e) {
+            throw new EngineException("Error wile encoding/decoding the 
request/"
+                +"response to the CELI language identifier service!",e);
+        } 
 
        }
        

Modified: 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java?rev=1338604&r1=1338603&r2=1338604&view=diff
==============================================================================
--- 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
 (original)
+++ 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
 Tue May 15 08:58:22 2012
@@ -6,11 +6,13 @@ import java.io.OutputStreamWriter;
 import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URL;
+import java.util.Collections;
 import java.util.List;
 import java.util.Vector;
 
 import javax.xml.soap.MessageFactory;
 import javax.xml.soap.SOAPBody;
+import javax.xml.soap.SOAPException;
 import javax.xml.soap.SOAPMessage;
 import javax.xml.soap.SOAPPart;
 import javax.xml.transform.stream.StreamSource;
@@ -70,94 +72,90 @@ public class LanguageIdentifierClientHTT
        }
 
 
-       
-       public List<GuessedLanguage> guessQueryLanguage(String text){
+       //NOTE (rwesten): I rather do the error handling in the 
EnhancementEngine!
+       public List<GuessedLanguage> guessQueryLanguage(String text) throws 
IOException, SOAPException{
+           if(text == null || text.isEmpty()){ // no text
+               return Collections.emptyList(); //no language
+           }
                List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
 
-               try {
-                       String txt = StringEscapeUtils.escapeXml(text);
-                       String xmldata = "<soapenv:Envelope 
xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\"; 
xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\";><soapenv:Header/><soapenv:Body>"
-                                       
+"<lan:guessQueryLanguage><textToGuess>"+txt+"</textToGuess></lan:guessQueryLanguage></soapenv:Body></soapenv:Envelope>";
-                       
-                       
-                       String responseXml = doPostRequest(this.serviceEP, 
xmldata);
-                       log.debug(responseXml);
-
-                       // Create SoapMessage
-                       MessageFactory msgFactory = 
MessageFactory.newInstance();
-                       SOAPMessage message = msgFactory.createMessage();
-                       SOAPPart soapPart = message.getSOAPPart();
-
-                       // Load the SOAP text into a stream source
-                       ByteArrayInputStream stream = new 
ByteArrayInputStream(responseXml.getBytes("UTF-8"));
-                       StreamSource source = new StreamSource(stream);
-
-                       // Set contents of message
-                       soapPart.setContent(source);
-
-                       SOAPBody soapBody = message.getSOAPBody();
-                       NodeList nlist = 
soapBody.getElementsByTagNameNS("*","return");
-                       for (int i = 0; i < nlist.getLength(); i++) {
-                               try {
-                                       Element result = (Element) 
nlist.item(i);
-                                       String lang = 
result.getAttribute("language");
-                                       double 
d=Double.parseDouble(result.getAttribute("guessConfidence"));
-                                       
-                                       guesses.add(new GuessedLanguage(lang, 
d));
-                               } catch (Exception e) {
-                                       e.printStackTrace();
-                               }
+               String txt = StringEscapeUtils.escapeXml(text);
+               String xmldata = "<soapenv:Envelope 
xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\"; 
xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\";><soapenv:Header/><soapenv:Body>"
+                               
+"<lan:guessQueryLanguage><textToGuess>"+txt+"</textToGuess></lan:guessQueryLanguage></soapenv:Body></soapenv:Envelope>";
+               
+               
+               String responseXml = doPostRequest(this.serviceEP, xmldata);
+               log.debug(responseXml);
 
+               // Create SoapMessage
+               MessageFactory msgFactory = MessageFactory.newInstance();
+               SOAPMessage message = msgFactory.createMessage();
+               SOAPPart soapPart = message.getSOAPPart();
+
+               // Load the SOAP text into a stream source
+               ByteArrayInputStream stream = new 
ByteArrayInputStream(responseXml.getBytes("UTF-8"));
+               StreamSource source = new StreamSource(stream);
+
+               // Set contents of message
+               soapPart.setContent(source);
+
+               SOAPBody soapBody = message.getSOAPBody();
+               NodeList nlist = soapBody.getElementsByTagNameNS("*","return");
+               for (int i = 0; i < nlist.getLength(); i++) {
+                       try {
+                               Element result = (Element) nlist.item(i);
+                               String lang = result.getAttribute("language");
+                               double 
d=Double.parseDouble(result.getAttribute("guessConfidence"));
+                               
+                               guesses.add(new GuessedLanguage(lang, d));
+                       } catch (Exception e) {
+                               e.printStackTrace();
                        }
-               } catch (Exception e) {
-                       e.printStackTrace();
+
                }
 
                return guesses;
        }
        
-       public List<GuessedLanguage> guessLanguage(String text) {
+    //NOTE (rwesten): I rather do the error handling in the EnhancementEngine!
+       public List<GuessedLanguage> guessLanguage(String text) throws 
IOException,SOAPException {
 
                List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
 
-               try {
-                       String txt = StringEscapeUtils.escapeXml(text);
-                       String xmldata = "<soapenv:Envelope 
xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\"; 
xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\";><soapenv:Header/><soapenv:Body>"
-                                       
+"<lan:guessLanguage><textToGuess>"+txt+"</textToGuess></lan:guessLanguage></soapenv:Body></soapenv:Envelope>";
-
-                       URI uri = new 
URI("http://linguagrid.org/LSGrid/ws/language-identifier";);
-                       
-                       String responseXml = doPostRequest(uri.toURL(), 
xmldata);
-                       log.debug(responseXml);
-
-                       // Create SoapMessage
-                       MessageFactory msgFactory = 
MessageFactory.newInstance();
-                       SOAPMessage message = msgFactory.createMessage();
-                       SOAPPart soapPart = message.getSOAPPart();
-
-                       // Load the SOAP text into a stream source
-                       ByteArrayInputStream stream = new 
ByteArrayInputStream(responseXml.getBytes("UTF-8"));
-                       StreamSource source = new StreamSource(stream);
-
-                       // Set contents of message
-                       soapPart.setContent(source);
-
-                       SOAPBody soapBody = message.getSOAPBody();
-                       NodeList nlist = 
soapBody.getElementsByTagNameNS("*","return");
-                       for (int i = 0; i < nlist.getLength(); i++) {
-                               try {
-                                       Element result = (Element) 
nlist.item(i);
-                                       String lang = 
result.getAttribute("language");
-                                       double 
d=Double.parseDouble(result.getAttribute("guessConfidence"));
-                                       
-                                       guesses.add(new GuessedLanguage(lang, 
d));
-                               } catch (Exception e) {
-                                       e.printStackTrace();
-                               }
+               String txt = StringEscapeUtils.escapeXml(text);
+               String xmldata = "<soapenv:Envelope 
xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\"; 
xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\";><soapenv:Header/><soapenv:Body>"
+                               
+"<lan:guessLanguage><textToGuess>"+txt+"</textToGuess></lan:guessLanguage></soapenv:Body></soapenv:Envelope>";
+               //NOTE (rwesten) I think this should be also the #serviceEP
+               //URI uri = new 
URI("http://linguagrid.org/LSGrid/ws/language-identifier";);
+               
+               String responseXml = doPostRequest(serviceEP, xmldata);
+               log.debug(responseXml);
 
+               // Create SoapMessage
+               MessageFactory msgFactory = MessageFactory.newInstance();
+               SOAPMessage message = msgFactory.createMessage();
+               SOAPPart soapPart = message.getSOAPPart();
+
+               // Load the SOAP text into a stream source
+               ByteArrayInputStream stream = new 
ByteArrayInputStream(responseXml.getBytes("UTF-8"));
+               StreamSource source = new StreamSource(stream);
+
+               // Set contents of message
+               soapPart.setContent(source);
+
+               SOAPBody soapBody = message.getSOAPBody();
+               NodeList nlist = soapBody.getElementsByTagNameNS("*","return");
+               for (int i = 0; i < nlist.getLength(); i++) {
+                       try {
+                               Element result = (Element) nlist.item(i);
+                               String lang = result.getAttribute("language");
+                               double 
d=Double.parseDouble(result.getAttribute("guessConfidence"));
+                               
+                               guesses.add(new GuessedLanguage(lang, d));
+                       } catch (Exception e) {
+                               e.printStackTrace();
                        }
-               } catch (Exception e) {
-                       e.printStackTrace();
+
                }
 
                return guesses;

Modified: 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java?rev=1338604&r1=1338603&r2=1338604&view=diff
==============================================================================
--- 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java
 (original)
+++ 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java
 Tue May 15 08:58:22 2012
@@ -1,30 +1,43 @@
 package org.apache.stanbol.enhancer.engines.celi.langid.impl;
 
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertFalse;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
+import static 
org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
+import static 
org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateEnhancement;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.net.UnknownHostException;
 import java.util.Dictionary;
+import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.Iterator;
 
+import junit.framework.Assert;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Resource;
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
 import 
org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import 
org.apache.stanbol.enhancer.engines.celi.classification.impl.CeliClassificationEnhancementEngine;
 import 
org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine;
 import 
org.apache.stanbol.enhancer.engines.celi.ner.impl.CeliNamedEntityExtractionEnhancementEngine;
-import 
org.apache.stanbol.enhancer.engines.celi.test_utils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -67,9 +80,14 @@ public class CeliLanguageIdentifierEnhan
                ContentItem ci = wrapAsContentItem(TEXT);
                try {
                        langIdentifier.computeEnhancements(ci);
-                       int textAnnoNum = 
checkAllTextAnnotations(ci.getMetadata(), TEXT);
-               log.info(textAnnoNum + " TextAnnotations found ...");
-               int entityAnnoNum = checkAllEntityAnnotations(ci.getMetadata());
+               HashMap<UriRef,Resource> expectedValues = new 
HashMap<UriRef,Resource>();
+               expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, 
ci.getUri());
+               expectedValues.put(Properties.DC_CREATOR, 
LiteralFactory.getInstance().createTypedLiteral(
+                   langIdentifier.getClass().getName()));
+               
+               validateLanguageAnnotation(ci.getMetadata(), 
TEXT,expectedValues);
+               int entityAnnoNum = 
validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
+               assertEquals("No EntityAnnotations are expected",0, 
entityAnnoNum);
                log.info(entityAnnoNum + " EntityAnnotations found ...");
                } catch (EngineException e) {
                        if (e.getCause() != null && e.getCause() instanceof 
UnknownHostException) {
@@ -80,29 +98,27 @@ public class CeliLanguageIdentifierEnhan
                }
        }
 
-       private int checkAllEntityAnnotations(MGraph g) {
-               Iterator<Triple> entityAnnotationIterator = g.filter(null, 
RDF_TYPE, ENHANCER_ENTITYANNOTATION);
-               int entityAnnotationCount = 0;
-               while (entityAnnotationIterator.hasNext()) {
-                       UriRef entityAnnotation = (UriRef) 
entityAnnotationIterator.next().getSubject();
-                       entityAnnotationCount++;
-               }
-               return entityAnnotationCount;
-       }
 
-       private int checkAllTextAnnotations(MGraph g, String content) {
+       private void validateLanguageAnnotation(MGraph g, String 
content,HashMap<UriRef,Resource> expectedValues) {
                Iterator<Triple> textAnnotationIterator = g.filter(null, 
RDF_TYPE, ENHANCER_TEXTANNOTATION);
                // test if a textAnnotation is present
-               assertTrue(textAnnotationIterator.hasNext());
-               int textAnnotationCount = 0;
-               while (textAnnotationIterator.hasNext()) {
-                       UriRef textAnnotation = (UriRef) 
textAnnotationIterator.next().getSubject();
-                       textAnnotationCount++;
-               }
-               return textAnnotationCount;
+               assertTrue("The Language Annotation is 
missing!",textAnnotationIterator.hasNext());
+               NonLiteral annotation = 
textAnnotationIterator.next().getSubject();
+               assertTrue("TextAnnotations MUST BE URIs", annotation 
instanceof UriRef);
+               assertFalse("Only a single Language Annotation is expected!", 
textAnnotationIterator.hasNext());
+               //validate enhancement metadata (this also checks the 
confidence)
+               validateEnhancement(g, (UriRef)annotation, expectedValues);
+               //validate the dc:language value
+               Iterator<Triple> languageIterator = g.filter(annotation, 
Properties.DC_LANGUAGE, null);
+        assertTrue("The fise:TextAnnotation for the language MUST HAVE a value 
for dc:language!",languageIterator.hasNext());
+        Resource languageResource = languageIterator.next().getObject();
+        assertFalse("Only a single dc:langauge value MUST BE present!", 
languageIterator.hasNext());
+        assertTrue("The dc:langauge value MUST BE a plain 
literal",languageResource instanceof PlainLiteral);
+        assertTrue("The dc:language value MIST BE at least two chars long",
+            ((PlainLiteral)languageResource).getLexicalForm().length()>=2);
        }
-
-       public static void addEnanchements(ContentItem ci) throws IOException, 
ConfigurationException, EngineException {
+// removed: other tests now add a simple triple with 
<{ciUri},dc:langauge,{lang}>
+/**    public static void addEnanchements(ContentItem ci) throws IOException, 
ConfigurationException, EngineException {
                //Add guessed language
                Dictionary<String, Object> properties = new Hashtable<String, 
Object>();
                properties.put(EnhancementEngine.PROPERTY_NAME, 
"celiLangIdentifier");
@@ -111,5 +127,5 @@ public class CeliLanguageIdentifierEnhan
                CeliLanguageIdentifierEnhancementEngine langIdentifier=new 
CeliLanguageIdentifierEnhancementEngine();
                langIdentifier.activate(context);
                langIdentifier.computeEnhancements(ci);
-       }
+       } **/
 }


Reply via email to