Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml?rev=1374984&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml
 Mon Aug 20 12:11:01 2012
@@ -0,0 +1,121 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+    <groupId>org.apache.stanbol</groupId>
+    <version>0.9.0-incubating</version>
+    <relativePath>../../parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightspot</artifactId>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight 
Spot</name>
+  <description>an enhancement engine for spotting</description>
+
+  <inceptionYear>2010</inceptionYear>
+
+  <!--scm>
+    <connection>
+      
scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+    </connection>
+    <developerConnection>
+      
scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+    </developerConnection>
+    <url>http://incubator.apache.org/stanbol/</url>
+  </scm-->
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Export-Package>
+              
org.apache.stanbol.enhancer.engines.dbpspotlightspot;version=${project.version}
+            </Export-Package>
+            <Embed-Dependency>
+            </Embed-Dependency>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-scr-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <!-- AL20 licensed files: See src/test/resources/README -->
+            <exclude>src/test/resources/en.txt</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.felix</groupId>
+      <artifactId>org.apache.felix.scr.annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/THIRD-PARTY.properties
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/THIRD-PARTY.properties?rev=1374984&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/THIRD-PARTY.properties
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/THIRD-PARTY.properties
 Mon Aug 20 12:11:01 2012
@@ -0,0 +1,17 @@
+# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
+#-------------------------------------------------------------------------------
+# Already used licenses in project :
+# - Apache License
+# - Common Development and Distribution License (CDDL) v1.0
+# - Common Public License Version 1.0
+# - ICU License
+# - MIT License
+# - The Apache Software License, Version 2.0
+#-------------------------------------------------------------------------------
+# Please fill the missing licenses for dependencies :
+#
+#
+#Wed Feb 15 19:06:13 CET 2012
+javax.servlet--servlet-api--2.4=Common Development And Distribution License 
(CDDL), Version 1.0
+org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
+org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java?rev=1374984&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java
 Mon Aug 20 12:11:01 2012
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightspot;
+
+//import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Stores the surface forms given by DBPedia Spotlight Spot.
+ *
+ * @author <a href="mailto:[email protected]";>Iavor Jelev</a>
+ */
+public class DBPSLSurfaceForm {
+
+       public String name;
+       public String type;
+       public Integer offset;
+
+       public String toString() {
+               return String.format( "[name=%s, offset=%i, type=%s]", name, 
offset, type ) ;
+       }
+}

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java?rev=1374984&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java
 Mon Aug 20 12:11:01 2012
@@ -0,0 +1,393 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightspot;
+
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import 
org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * {@link DBPSpotlightSpotEnhancementEngine} provides functionality to enhance 
document
+ * with their language.
+ *
+ * @author Iavor Jelev, Babelmonkeys (GzEvD)
+ */
+@Component(
+    metatype = true, 
+    immediate = true,
+    label = "%stanbol.DBPSpotlightSpotEnhancementEngine.name", 
+    description = "%stanbol.DBPSpotlightSpotEnhancementEngine.description")
+@Service
+@Properties(value={
+    @Property(name=EnhancementEngine.PROPERTY_NAME,value="dbpspotlightspot")
+})
+public class DBPSpotlightSpotEnhancementEngine 
+        extends AbstractEnhancementEngine<IOException,RuntimeException>
+        implements EnhancementEngine, ServiceProperties {
+
+    /**
+     * a configurable value of the text segment length to check
+     */
+    @Property(value = "http://spotlight.dbpedia.org/rest/spot";)
+    public static final String SL_URL_KEY = 
"stanbol.DBPSpotlightSpotEnhancementEngine.url";
+    
+    @Property(value = "LingPipeSpotter")
+    public static final String SL_SPOTTER = 
"stanbol.DBPSpotlightSpotEnhancementEngine.spotter";
+
+
+    /** 
+     * The default value for the Execution of this Engine. Currently set to
+     * {@link ServiceProperties#ORDERING_PRE_PROCESSING} 
+     */
+    public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 
29;
+
+    /**
+     * This contains the only MIME type directly supported by this enhancement 
engine.
+     */
+    private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+    /**
+     * Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE}
+     */
+    private static final Set<String> SUPPORTED_MIMTYPES = 
Collections.singleton(TEXT_PLAIN_MIMETYPE);
+
+    /**
+     * This contains a list of languages supported by DBpedia Spotlight.
+     * If the metadata doesn't contain a value for the language as the value 
of the {@link Property.DC_LANG property}
+     * the content can't be processed.
+     */
+    protected static final Set<String> SUPPORTED_LANGUAGES = 
+            Collections.unmodifiableSet(new HashSet<String>(
+                    Arrays.asList("en")));
+
+    /** holds the logger. */
+    private static final Logger log = 
LoggerFactory.getLogger(DBPSpotlightSpotEnhancementEngine.class);
+
+    /** holds the url of the Spotlight REST endpoint */
+    private String spotlightUrl;
+    /** holds the chosen of spotter to be used */
+    private String spotlightSpotter;
+
+
+
+    /**
+     * Initialize all parameters from the configuration panel, or with their 
default values
+     * @param ce  the {@link ComponentContext}
+     */
+    @SuppressWarnings("unchecked")
+       protected void activate(ComponentContext ce) throws 
ConfigurationException, IOException {
+        
+        super.activate(ce);
+        
+        Dictionary<String, Object> properties = ce.getProperties();
+        spotlightUrl                          = properties.get( SL_URL_KEY ) 
== null ? "http://spotlight.dbpedia.org/rest/spot"; : (String) properties.get( 
SL_URL_KEY );
+        spotlightSpotter                      = properties.get( SL_SPOTTER ) 
== null ? null : (String) properties.get( SL_SPOTTER );
+    }   
+    
+
+    /**
+     * Check if the content can be enhanced
+     * @param ci  the {@link ContentItem}
+     */
+    public int canEnhance(ContentItem ci) throws EngineException {
+        if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null){
+            String language = getMetadataLanguage(ci.getMetadata(), null);
+            if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
+                log.info("DBpedia Spotlight can not process ContentItem {} 
because "
+                    + "language {} is not supported (supported: {})",
+                    new Object[]{ci.getUri(),language,SUPPORTED_LANGUAGES});
+                return CANNOT_ENHANCE;
+            }
+            return ENHANCE_SYNCHRONOUS;
+        } 
+        return CANNOT_ENHANCE;
+    }
+
+
+    /**
+     * Calculate the enhancements by doing a POST request to the DBpedia 
Spotlight endpoint and processing the results 
+     * @param ci  the {@link ContentItem}
+     */
+    public void computeEnhancements( ContentItem ci ) throws EngineException {
+        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, 
SUPPORTED_MIMTYPES);
+        if(contentPart == null){
+            throw new IllegalStateException("No ContentPart with Mimetype '"
+                    + TEXT_PLAIN_MIMETYPE+"' found for ContentItem 
"+ci.getUri()
+                    + ": This is also checked in the canEnhance method! -> 
This "
+                    + "indicated an Bug in the implementation of the "
+                    + "EnhancementJobManager!");
+        }
+        String text = "";
+        try {
+            text = ContentItemHelper.getText(contentPart.getValue());
+        } catch (IOException e) {
+            throw new InvalidContentException(this, ci, e);
+        }
+
+        Collection<DBPSLSurfaceForm> dbpslGraph = doPostRequest( text );
+        if ( dbpslGraph != null ) {
+            //Acquire a write lock on the ContentItem when adding the 
enhancements
+            ci.getLock().writeLock().lock();
+            try {
+                createEnhancements( dbpslGraph, ci);
+                if (log.isDebugEnabled()) {
+                    Serializer serializer = Serializer.getInstance();
+                    ByteArrayOutputStream debugStream = new 
ByteArrayOutputStream();
+                    serializer.serialize(debugStream, ci.getMetadata(), 
"application/rdf+xml");
+                    try {
+                        log.debug("DBpedia Spotlight Spot 
Enhancements:\n{}",debugStream.toString("UTF-8"));
+                    } catch (UnsupportedEncodingException e) {
+                        e.printStackTrace();
+                    }
+                }
+            } finally {
+                ci.getLock().writeLock().unlock();
+            }
+        }
+    }
+
+
+    /**
+     * The method adds the returned DBpedia Spotlight surface forms to the 
content item's metadata.
+     * For each one an TextAnnotation is created.
+     *
+     * @param occs a Collection of entity information
+     * @param ci the content item
+     */
+    public void createEnhancements( Collection<DBPSLSurfaceForm> occs, 
ContentItem ci ) {
+        LiteralFactory literalFactory = LiteralFactory.getInstance();
+        final Language language;       // used for plain literals representing 
parts fo the content
+        String langString             = getMetadataLanguage(ci.getMetadata(), 
null);
+        
+        if(langString != null && !langString.isEmpty()){
+            language = new Language(langString);
+        } else {
+            language = null;
+        }
+        
+        HashMap<String, UriRef> entityAnnotationMap = new HashMap<String, 
UriRef>();
+
+        for (DBPSLSurfaceForm occ : occs) {
+            UriRef textAnnotation = 
EnhancementEngineHelper.createTextEnhancement( ci, this );
+            MGraph model          = ci.getMetadata();
+
+            model.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTED_TEXT,new PlainLiteralImpl(occ.name,language)));
+            model.add(new TripleImpl(textAnnotation, ENHANCER_START, 
literalFactory.createTypedLiteral(occ.offset)));
+            model.add(new TripleImpl(textAnnotation, ENHANCER_END, 
literalFactory.createTypedLiteral(occ.offset + occ.name.length())));
+            model.add(new TripleImpl(textAnnotation, DC_TYPE, new UriRef( 
occ.type )));
+            // TODO ################## model.add(new 
TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new 
PlainLiteralImpl(occ.context,language)));
+
+            if (entityAnnotationMap.containsKey(occ.name)) {
+                model.add(new TripleImpl(entityAnnotationMap.get(occ.name), 
DC_RELATION, textAnnotation));
+            } 
+            else {
+                entityAnnotationMap.put(occ.name,textAnnotation);
+            }
+        }
+    }
+
+
+
+
+    /**
+     * Sends a POST request to the DBpediaSpotlight url.
+     * @param text a <code>String</code> with the text to be analyzed
+     * @return a <code>String</code> with the server response
+     * @throws EngineException if the request cannot be sent
+     */
+    public Collection<DBPSLSurfaceForm> doPostRequest( String text ) throws 
EngineException {
+               StringBuilder data = new StringBuilder();
+               try {
+                       if ( spotlightSpotter != null && 
!spotlightSpotter.isEmpty() )
+                       data.append( URLEncoder.encode( "spotter", "UTF-8" ) + 
"=" + URLEncoder.encode( spotlightSpotter, "UTF-8" ) + "&" );
+                       data.append( URLEncoder.encode( "text", "UTF-8" ) + "=" 
+ URLEncoder.encode( text, "UTF-8" ) );
+               } catch (UnsupportedEncodingException e) {
+                       throw new EngineException( "Data for the httprequest 
could not be converted. Error: " + e.getMessage() );
+               }
+
+               HttpURLConnection connection = null;  
+               StringBuffer response        = new StringBuffer(); 
+               
+        try {
+                       //Create connection
+                       URL url = new URL( spotlightUrl );
+                       connection = ( HttpURLConnection )url.openConnection();
+                       connection.setRequestMethod( "POST" );
+                       connection.setRequestProperty( "Content-Type", 
"application/x-www-form-urlencoded" );
+                       connection.setRequestProperty( "Accept", "text/xml" );
+
+                       connection.setUseCaches( false );
+                       connection.setDoInput( true );
+                       connection.setDoOutput( true );
+
+                       //Send request
+                       DataOutputStream wr = new DataOutputStream (
+                                       connection.getOutputStream ());
+                       wr.writeBytes( data.toString() );
+                       wr.flush ();
+                       wr.close ();
+
+                       //Get Response  
+                       InputStream is = connection.getInputStream();
+                       BufferedReader rd = new BufferedReader( new 
InputStreamReader( is ) );
+                       String line;
+                       while((line = rd.readLine()) != null) {
+                               response.append( line );
+                               response.append( '\r' );
+                       }
+                       rd.close();
+
+               } catch (Exception e) {
+
+                   log.error( "[request] Request could not be made. Error: " + 
e.getMessage() );
+                       e.printStackTrace();
+                       return null;
+
+               } finally {
+
+                       if(connection != null) {
+                               connection.disconnect(); 
+                       }
+               }
+               
+
+           XMLParser xmlParser = new XMLParser();
+               try {
+                   Document xmlDoc                   = 
xmlParser.loadXMLFromString( response.toString() );
+                       NodeList nlist                    = 
xmlParser.getElementsByTagName( xmlDoc, "surfaceForm" );
+                       Collection<DBPSLSurfaceForm> annos = 
this.getAnnotations( nlist );
+        
+            return annos;
+               } catch ( Exception e) {
+                   log.error( "[response] Response XML could not be parsed. 
Error: " + e.getMessage() );
+                       throw new EngineException( "Response XML could not be 
parsed. Error: " + e.getMessage() );
+               }
+    }
+    
+    
+    /**
+     * This method creates the Collection of surface forms, which the method 
<code>createEnhancement</code>
+     * adds to the meta data of the content item as TextAnnotations.
+     * @param nList NodeList of all Resources contained in the XML response 
from DBpedia Spotlight
+     * @return a Collection<DBPSLSurfaceForm> with all annotations
+     */
+       private Collection<DBPSLSurfaceForm> getAnnotations( NodeList nList ) {
+               Collection<DBPSLSurfaceForm> dbpslAnnos = new 
HashSet<DBPSLSurfaceForm>();
+               
+               for (int temp = 0; temp < nList.getLength(); temp++) {
+                       DBPSLSurfaceForm dbpslann        = new 
DBPSLSurfaceForm();
+                       Element node                     = (Element) 
nList.item(temp);
+                       dbpslann.name                    = node.getAttribute( 
"name" );
+                       dbpslann.offset                  = (new Integer( 
node.getAttribute( "offset" ) ) ).intValue();
+                       dbpslann.type                    = node.getAttribute( 
"type" );
+                       
+                       dbpslAnnos.add( dbpslann );
+               }
+               
+               return dbpslAnnos;
+       }
+
+
+    public Map<String, Object> getServiceProperties() {
+        return 
Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
 (Object) defaultOrder));
+    }
+
+
+    public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+        Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+        if (it.hasNext()) {
+            Resource langNode = it.next().getObject();
+            return getLexicalForm(langNode);
+        }
+        return null;
+    }
+
+    public String getLexicalForm(Resource res) {
+        if (res == null) {
+            return null;
+        } else if (res instanceof Literal) {
+            return ((Literal) res).getLexicalForm();
+        } else {
+            return res.toString();
+        }
+    }
+
+    
+    /**
+     * This method is used by the test class to set the endpoint url
+     * @param url String the url of the Spotlight endpoint
+     */
+    public void setEndpointUrl( String url ) {
+       spotlightUrl = url;
+    }
+
+}

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java?rev=1374984&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java
 Mon Aug 20 12:11:01 2012
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightspot;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+
+/**
+ * Parses the XML results given by DBPedia Spotlight.
+ *
+ * @author <a href="mailto:[email protected]";>Iavor Jelev</a>
+ */
+
+public class XMLParser {
+ 
+       public NodeList getElementsByTagName( Document doc, String tagName ) {
+               
+               return doc.getElementsByTagName( tagName );
+       }
+       
+
+       public Document loadXMLFromString( String xml ) throws SAXException, 
IOException {
+               Document doc = loadXMLFromInputStream( new 
ByteArrayInputStream( xml.getBytes() ) );
+               doc.getDocumentElement().normalize();
+
+               return doc;
+       }
+
+       
+       public Document loadXMLFromInputStream( InputStream is ) throws 
SAXException, IOException {
+               DocumentBuilderFactory factory = 
DocumentBuilderFactory.newInstance();
+               factory.setNamespaceAware( true );
+               DocumentBuilder builder        = null;
+               try {
+                       builder = factory.newDocumentBuilder();
+               }
+               catch ( ParserConfigurationException ex ) {
+               }  
+               Document doc = builder.parse(is);
+               is.close();
+               doc.getDocumentElement().normalize();
+
+               return doc;
+       }
+
+
+       public Document loadXMLFromFile( String filePath ) throws 
ParserConfigurationException, SAXException, IOException {
+               File fXmlFile = new File( filePath );
+               DocumentBuilderFactory dbFactory = 
DocumentBuilderFactory.newInstance();
+               DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+               Document doc = dBuilder.parse(fXmlFile);
+               doc.getDocumentElement().normalize();
+               
+               return doc;
+       }
+}
\ No newline at end of file

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1374984&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/metatype.properties
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/metatype.properties
 Mon Aug 20 12:11:01 2012
@@ -0,0 +1,32 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+
+
+# This file contains localization strings for configuration labels and
+# descriptions as used in the metatype.xml descriptor generated by the
+# the maven SCR plugin
+
+stanbol.DBPSpotlightSpotEnhancementEngine.name = DBpedia Spotlight Spotter: 
Named Entity Recognition
+stanbol.DBPSpotlightSpotEnhancementEngine.description = This engine performs 
just Named Entity Recognition, \
+    so it is suited for EnhancementChain scenario, in which another Engine 
links the recognized TextAnnotations \
+    to Ontology Types
+stanbol.DBPSpotlightSpotEnhancementEngine.url.name = Spotlight URL
+stanbol.DBPSpotlightSpotEnhancementEngine.url.description = The URL which will 
be used for the request
+stanbol.DBPSpotlightSpotEnhancementEngine.spotter.name = Spotter
+stanbol.DBPSpotlightSpotEnhancementEngine.spotter.description = The algorithm 
which will be used for Spotting \
+    (aka Term Recognition). Currently available: NER, LingPipeSpotter, 
OpenNLPChunkerSpotter, Kea

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java?rev=1374984&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java
 Mon Aug 20 12:11:01 2012
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightspot.core;
+
+import java.util.Collection;
+
+import org.apache.stanbol.enhancer.engines.dbpspotlightspot.DBPSLSurfaceForm;
+import 
org.apache.stanbol.enhancer.engines.dbpspotlightspot.DBPSpotlightSpotEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.osgi.service.cm.ConfigurationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class provides a JUnit test for DBpedia Spotlight Spot 
EnhancementEngine.
+ * @author Iavor Jelev, babelmonkeys / GzEvD
+ */
+public class DBPSpotlightSpotEnhancementTest {
+
+         /**
+          * This contains the logger.
+          */
+         private static final Logger LOG  = 
LoggerFactory.getLogger(DBPSpotlightSpotEnhancementTest.class);
+         private static String SPL_URL    = 
System.getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY) == null ? 
+                                                                               
                                "http://spotlight.dbpedia.org/rest/spot"; : 
(String) System.getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY);
+         private static String TEST_TEXT  = "President Obama is meeting Angela 
Merkel in Berlin on Monday";
+         private static DBPSpotlightSpotEnhancementEngine dbpslight;
+         
+         @BeforeClass
+         public static void oneTimeSetup() throws ConfigurationException {
+                 dbpslight = new DBPSpotlightSpotEnhancementEngine();
+                 dbpslight.setEndpointUrl( SPL_URL );
+         }
+
+         
+         @Test
+         public void testEntityExtraction() {
+           Collection<DBPSLSurfaceForm> entities;
+               try {
+                       entities = dbpslight.doPostRequest( TEST_TEXT );
+                   LOG.info("Found entities: {}",entities.size());
+                   LOG.debug("Entities:\n{}",entities);
+                   Assert.assertFalse("No entities were found!", 
entities.isEmpty());
+               } catch (EngineException e) {
+                   Assert.assertFalse("An EngineException occurred! The 
message was: " + e.getMessage(), true);
+               }
+         }
+
+}

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/README
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/README?rev=1374984&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/README
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/README
 Mon Aug 20 12:11:01 2012
@@ -0,0 +1,15 @@
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+


Reply via email to