Author: rwesten
Date: Thu May 31 04:45:41 2012
New Revision: 1344563

URL: http://svn.apache.org/viewvc?rev=1344563&view=rev
Log:
STANBOL-583: Applied the patch provided by Alessio Bosca on 2012-05-30

* had to manually merge the example texts in the NER test because of encoding 
issues.
* unit tests do complete after applying the patch!

Modified:
    
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java
    
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
    
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
    
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
    
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java

Modified: 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java?rev=1344563&r1=1344562&r2=1344563&view=diff
==============================================================================
--- 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java
 (original)
+++ 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java
 Thu May 31 04:45:41 2012
@@ -250,45 +250,45 @@ public class ClassificationClientHTTP {
        //      An even better variant would be to write a UnitTest for that!!
        //      This would be recommended of the called service is still in beta
        //      and may change at any time
-//    public static void main(String[] args) throws Exception {
-//        String lang = "fr";
-//        String text = "Brigitte Bardot, née  le 28 septembre " +
-//                "1934 à Paris, est une actrice de cinéma et chanteuse 
française.";
-//        
-//        //For request testing
-//        //Writer request = new StringWriter();
-//        
-//        //For response testing
-//        HttpURLConnection con = Utils.createPostRequest(
-//            new 
URL("http://linguagrid.org/LSGrid/ws/dbpedia-classification";),
-//            Collections.singletonMap("Content-Type", CONTENT_TYPE));
-//        Writer request = new OutputStreamWriter(con.getOutputStream(),UTF8);
-//        
-//        //"stream" the request content directly to the buffered writer
-//        BufferedWriter writer = new BufferedWriter(request);
-//        
-//        writer.write(SOAP_PREFIX);
-//        writer.write("<clas:classify>");
-//        writer.write("<clas:user>wiki</clas:user>");//TODO: should the user 
be configurable?
-//        writer.write("<clas:model>");
-//        writer.write(lang);
-//        writer.write("</clas:model>");
-//        writer.write("<clas:text>");
-//        StringEscapeUtils.escapeXml(writer, text); //write the escaped text 
directly to the request
-//        writer.write("</clas:text>");
-//        writer.write("</clas:classify>");
-//        writer.write(SOAP_SUFFIX);
-//        writer.close();
-//        
-//        //log the Request (if request testing)
-//        //log.info("Request \n{}",request.toString());
-//        
-//        //for response testing we need to call the service
-//        //Call the service
-//        long start = System.currentTimeMillis();
-//        InputStream stream = con.getInputStream();
-//        log.info("Request to took {}ms",System.currentTimeMillis()-start);
-//        log.info("Response:\n{}",IOUtils.toString(stream));
-//        stream.close();
-//    }
+    public static void main(String[] args) throws Exception {
+        String lang = "fr";
+        String text = "Brigitte Bardot, née  le 28 septembre " +
+                "1934 à Paris, est une actrice de cinéma et chanteuse 
française.";
+        
+        //For request testing
+        //Writer request = new StringWriter();
+        
+        //For response testing
+        HttpURLConnection con = Utils.createPostRequest(
+            new URL("http://linguagrid.org/LSGrid/ws/dbpedia-classification";),
+            Collections.singletonMap("Content-Type", CONTENT_TYPE));
+        Writer request = new OutputStreamWriter(con.getOutputStream(),UTF8);
+        
+        //"stream" the request content directly to the buffered writer
+        BufferedWriter writer = new BufferedWriter(request);
+        
+        writer.write(SOAP_PREFIX);
+        writer.write("<clas:classify>");
+        writer.write("<clas:user>wiki</clas:user>");//TODO: should the user be 
configurable?
+        writer.write("<clas:model>");
+        writer.write(lang);
+        writer.write("</clas:model>");
+        writer.write("<clas:text>");
+        StringEscapeUtils.escapeXml(writer, text); //write the escaped text 
directly to the request
+        writer.write("</clas:text>");
+        writer.write("</clas:classify>");
+        writer.write(SOAP_SUFFIX);
+        writer.close();
+        
+        //log the Request (if request testing)
+        //log.info("Request \n{}",request.toString());
+        
+        //for response testing we need to call the service
+        //Call the service
+        long start = System.currentTimeMillis();
+        InputStream stream = con.getInputStream();
+        log.info("Request to took {}ms",System.currentTimeMillis()-start);
+        log.info("Response:\n{}",IOUtils.toString(stream));
+        stream.close();
+    }
 }

Modified: 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java?rev=1344563&r1=1344562&r2=1344563&view=diff
==============================================================================
--- 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
 (original)
+++ 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
 Thu May 31 04:45:41 2012
@@ -72,7 +72,7 @@ public class LemmatizerClientHTTP {
         BufferedWriter writer = new BufferedWriter(new 
OutputStreamWriter(con.getOutputStream(),UTF8));
         //write the SOAP envelope, header and start the body
         writer.write(SOAP_REQUEST_PREFIX);
-        //wrtie the data (language and text)
+        //write the data (language and text)
         writer.write("<mor:inputText lang=\"");
         writer.write(lang);
         writer.write("\" text=\"");

Modified: 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java?rev=1344563&r1=1344562&r2=1344563&view=diff
==============================================================================
--- 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
 (original)
+++ 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
 Thu May 31 04:45:41 2012
@@ -75,12 +75,12 @@ public class CeliNamedEntityExtractionEn
        private static Map<String, UriRef> entityTypes = new HashMap<String, 
UriRef>();
        static {
                entityTypes.put("pers", OntologicalClasses.DBPEDIA_PERSON);
+               entityTypes.put("PER", OntologicalClasses.DBPEDIA_PERSON);
                entityTypes.put("loc", OntologicalClasses.DBPEDIA_PLACE);
+               entityTypes.put("GPE", OntologicalClasses.DBPEDIA_PLACE);
                entityTypes.put("org", OntologicalClasses.DBPEDIA_ORGANISATION);
 
                entityTypes.put("time", OntologicalClasses.SKOS_CONCEPT);
-               entityTypes.put("prod", OntologicalClasses.SKOS_CONCEPT);
-               entityTypes.put("amount", OntologicalClasses.SKOS_CONCEPT);
        }
        /**
         * The supported languages (configured via the {@link 
#SUPPORTED_LANGUAGES}
@@ -114,7 +114,7 @@ public class CeliNamedEntityExtractionEn
        @Property(value = 
"http://linguagrid.org/LSGrid/ws/com.celi-france.linguagrid.namedentityrecognition.v0u0.demo";)
        public static final String SERVICE_URL = 
"org.apache.stanbol.enhancer.engines.celi.ner.url";
 
-    @Property(value = "fr",cardinality=1000)
+    @Property(value = {"fr","it"},cardinality=1000)
     public static final String SUPPORTED_LANGUAGES = 
"org.apache.stanbol.enhancer.engines.celi.ner.languages";
                
        private String licenseKey;
@@ -128,7 +128,8 @@ public class CeliNamedEntityExtractionEn
                super.activate(ctx);
                @SuppressWarnings("unchecked")
         Dictionary<String, Object> properties = ctx.getProperties();
-
+        log.info("Activate CELI NER engine:");
+        log.info(" > name: {}",getName());
                this.licenseKey = (String) properties.get(LICENSE_KEY);
                if (licenseKey == null || licenseKey.isEmpty()) {
                        log.warn("no CELI license key configured for this 
Engine, a guest account will be used (max 100 requests per day). Go on 
http://linguagrid.org for getting a proper license key.");
@@ -140,6 +141,7 @@ public class CeliNamedEntityExtractionEn
                this.serviceURL = new URL(url);
                
                this.client = new NERserviceClientHTTP(this.serviceURL, 
this.licenseKey);
+        log.info(" > CELI service: {}",serviceURL);
                
                //init the supported languages (now configurable)
                Object languagObject = properties.get(SUPPORTED_LANGUAGES);
@@ -173,6 +175,7 @@ public class CeliNamedEntityExtractionEn
                                    languagObject));
                }
                this.supportedLangs = Collections.unmodifiableSet(languages);
+        log.info(" > supported languages: {}",supportedLangs);
        }
 
        @Override
@@ -229,7 +232,7 @@ public class CeliNamedEntityExtractionEn
         }
         Language lang = new Language(language); //used for the palin literals 
in TextAnnotations
                try {
-                       List<NamedEntity> lista = 
this.client.extractEntities(text);
+                       List<NamedEntity> lista = 
this.client.extractEntities(text, language);
                        LiteralFactory literalFactory = 
LiteralFactory.getInstance();
 
                        MGraph g = ci.getMetadata();
@@ -269,7 +272,7 @@ public class CeliNamedEntityExtractionEn
 
        private Resource getEntityRefForType(String type) {
                if (!entityTypes.containsKey(type))
-                       return null;
+                       return OntologicalClasses.SKOS_CONCEPT;
                else
                        return entityTypes.get(type);
        }

Modified: 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java?rev=1344563&r1=1344562&r2=1344563&view=diff
==============================================================================
--- 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
 (original)
+++ 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
 Thu May 31 04:45:41 2012
@@ -41,14 +41,14 @@ public class NERserviceClientHTTP {
         * The XML version, encoding; SOAP envelope, heder and starting element 
of the body;
         * processTextRequest and text starting element.
         */
-    private static final String REQUEST_PREFIX = "<?xml version=\"1.0\" 
encoding=\""+UTF8.name()+"\"?>" +
+    private static final String SOAP_PREFIX = "<?xml version=\"1.0\" 
encoding=\""+UTF8.name()+"\"?>" +
                "<soapenv:Envelope 
xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\"; " +
                
"xmlns:v0u0=\"http://linguagrid.org/ns/namedentityrecognition/v0u0\";><soapenv:Header/>"
 +
-               "<soapenv:Body><v0u0:processTextRequest><v0u0:text>";
+               "<soapenv:Body>";
     /**
      * closes the text, processTextRequest, SOAP body and envelope
      */
-    private static final String REQUEST_SUFFIX = 
"</v0u0:text></v0u0:processTextRequest></soapenv:Body></soapenv:Envelope>";
+    private static final String SOAP_SUFFIX = 
"</soapenv:Body></soapenv:Envelope>";
        
        private final URL serviceEP;
        private final String licenseKey;
@@ -70,7 +70,7 @@ public class NERserviceClientHTTP {
        }
 
 
-       public List<NamedEntity> extractEntities(String text) throws 
SOAPException, IOException {
+       public List<NamedEntity> extractEntities(String text, String lang) 
throws SOAPException, IOException {
            if(text == null || text.isEmpty()){
                //no text -> no extractions
                return Collections.emptyList();
@@ -80,9 +80,11 @@ public class NERserviceClientHTTP {
                HttpURLConnection con = Utils.createPostRequest(serviceEP, 
requestHeaders);
                //write content
                BufferedWriter writer = new BufferedWriter(new 
OutputStreamWriter(con.getOutputStream(),UTF8));
-               writer.write(REQUEST_PREFIX);
+               writer.write(SOAP_PREFIX);
+               writer.write("<v0u0:processTextRequest><v0u0:text>");
                StringEscapeUtils.escapeXml(writer, text);
-               writer.write(REQUEST_SUFFIX);
+               
writer.write("</v0u0:text><v0u0:language>"+lang+"</v0u0:language></v0u0:processTextRequest>");
+               writer.write(SOAP_SUFFIX);
                writer.close();
                //Call the service
                long start = System.currentTimeMillis();

Modified: 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java?rev=1344563&r1=1344562&r2=1344563&view=diff
==============================================================================
--- 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java
 (original)
+++ 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java
 Thu May 31 04:45:41 2012
@@ -40,14 +40,19 @@ public class CeliNamedEntityExtractionEn
        
     private static final ContentItemFactory ciFactory = 
InMemoryContentItemFactory.getInstance();
 
-    private static final String TEXT = "Brigitte Bardot, née  le 28 septembre 
1934 à Paris, est une actrice de cinéma et chanteuse française.";
-
+    private static final String TEXT_it = "Wolfgang Amadeus Mozart, nome di " +
+               "battesimo Joannes Chrysostomus Wolfgangus Theophilus Mozart " +
+               "(Salisburgo, 27 gennaio 1756 – Vienna, 5 dicembre 1791), è 
stato " +
+               "un compositore, pianista, organista e violinista.";
+    private static final String TEXT_fr = "Brigitte Bardot, née  le 28 
septembre " +
+               "1934 à Paris, est une actrice de cinéma et chanteuse 
française.";
+    
        @BeforeClass
        public static void setUpServices() throws IOException, 
ConfigurationException {
                Dictionary<String, Object> properties = new Hashtable<String, 
Object>();
                properties.put(EnhancementEngine.PROPERTY_NAME, "celiNer");
            
properties.put(CeliNamedEntityExtractionEnhancementEngine.SERVICE_URL, 
"http://linguagrid.org/LSGrid/ws/com.celi-france.linguagrid.namedentityrecognition.v0u0.demo";);
-           
properties.put(CeliNamedEntityExtractionEnhancementEngine.SUPPORTED_LANGUAGES, 
"fr");
+           
properties.put(CeliNamedEntityExtractionEnhancementEngine.SUPPORTED_LANGUAGES, 
"fr;it");
            MockComponentContext context = new MockComponentContext(properties);
                nerEngine.activate(context);
        }
@@ -60,17 +65,12 @@ public class CeliNamedEntityExtractionEn
     public static ContentItem wrapAsContentItem(final String text) throws 
IOException {
         return ciFactory.createContentItem(new StringSource(text));
     }
-
-       @Test
-       public void tesetEngine() throws Exception {
-               ContentItem ci = wrapAsContentItem(TEXT);
+    
+    private void testInput(String txt,String lang) throws EngineException, 
IOException{
+       ContentItem ci = wrapAsContentItem(txt);
                try {
-                   //add a simple triple to statically define the language of 
the test
-                   //content
-                   ci.getMetadata().add(new TripleImpl(ci.getUri(), 
DC_LANGUAGE, new PlainLiteralImpl("fr")));
-                   //unit test should not depend on each other (if possible)
-                       
//CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
-                       
+                   //add a simple triple to statically define the language of 
the test content
+                   ci.getMetadata().add(new TripleImpl(ci.getUri(), 
DC_LANGUAGE, new PlainLiteralImpl(lang)));
                        nerEngine.computeEnhancements(ci);
 
                        TestUtils.logEnhancements(ci);
@@ -79,7 +79,7 @@ public class CeliNamedEntityExtractionEn
                        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, 
ci.getUri());
                        expectedValues.put(Properties.DC_CREATOR, 
LiteralFactory.getInstance().createTypedLiteral(
                            nerEngine.getClass().getName()));
-                       int textAnnoNum = 
validateAllTextAnnotations(ci.getMetadata(), TEXT, expectedValues);
+                       int textAnnoNum = 
validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
                log.info(textAnnoNum + " TextAnnotations found ...");
                int entityAnnoNum = 
EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(),expectedValues);
                log.info(entityAnnoNum + " EntityAnnotations found ...");
@@ -90,6 +90,12 @@ public class CeliNamedEntityExtractionEn
                        }
                        throw e;
                }
+    }
+    
+       @Test
+       public void tesetEngine() throws Exception {
+               
this.testInput(CeliNamedEntityExtractionEnhancementEngineTest.TEXT_it, "it");
+               
this.testInput(CeliNamedEntityExtractionEnhancementEngineTest.TEXT_fr, "fr");
        }
 
 //     private int checkAllEntityAnnotations(MGraph g) {


Reply via email to