Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java?rev=1375110&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
 Mon Aug 20 17:14:56 2012
@@ -0,0 +1,497 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate;
+
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import 
org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * {@link DBPSpotlightDisambiguateEnhancementEngine} provides functionality to
+ * enhance document with their language.
+ * 
+ * @author Iavor Jelev, Babelmonkeys (GzEvD)
+ */
+@Component(metatype = true, immediate = true, label = 
"%stanbol.DBPSpotlightDisambiguateEnhancementEngine.name", description = 
"%stanbol.DBPSpotlightDisambiguateEnhancementEngine.description")
+@Service
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value 
= "dbpspotlightdisambiguate") })
+public class DBPSpotlightDisambiguateEnhancementEngine extends
+               AbstractEnhancementEngine<IOException, RuntimeException> 
implements
+               EnhancementEngine, ServiceProperties {
+
+       // all parameters which can be used to configure the EnhancementEngine
+       @Property(value = "http://spotlight.dbpedia.org/rest/annotate";)
+       public static final String SL_URL_KEY = 
"stanbol.DBPSpotlightDisambiguateEnhancementEngine.url";
+
+       @Property(value = "Document")
+       public static final String SL_DISAMBIGUATOR = 
"stanbol.DBPSpotlightDisambiguateEnhancementEngine.disambiguator";
+
+       @Property()
+       public static final String SL_RESTRICTION = 
"stanbol.DBPSpotlightDisambiguateEnhancementEngine.types";
+
+       @Property()
+       public static final String SL_SPARQL = 
"stanbol.DBPSpotlightDisambiguateEnhancementEngine.sparql";
+
+       @Property()
+       public static final String SL_SUPPORT = 
"stanbol.DBPSpotlightDisambiguateEnhancementEngine.support";
+
+       @Property()
+       public static final String SL_CONFIDENCE = 
"stanbol.DBPSpotlightDisambiguateEnhancementEngine.confidence";
+
+       /**
+        * The default value for the Execution of this Engine. Currently set to
+        * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
+        */
+       public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION 
- 31;
+
+       /**
+        * This contains the only MIME type directly supported by this 
enhancement
+        * engine.
+        */
+       private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+       /** Set containing the only supported mime type {@link 
#TEXT_PLAIN_MIMETYPE} */
+       private static final Set<String> SUPPORTED_MIMTYPES = Collections
+                       .singleton(TEXT_PLAIN_MIMETYPE);
+       /** This contains the logger. */
+       private static final Logger log = LoggerFactory
+                       
.getLogger(DBPSpotlightDisambiguateEnhancementEngine.class);
+       /** holds the url of the Spotlight REST endpoint */
+       private String spotlightUrl;
+       /** holds the chosen of disambiguator to be used */
+       private String spotlightDisambiguator;
+       /** holds the type restriction for the results, if the user wishes one 
*/
+       private String spotlightTypesRestriction;
+       /** holds the chosen minimal support value */
+       private String spotlightSupport;
+       /** holds the chosen minimal confidence value */
+       private String spotlightConfidence;
+       /** holds the sparql restriction for the results, if the user wishes 
one */
+       private String spotlightSparql;
+       /**
+        * holds the existing TextAnnotations, which are used as input for 
DBpedia
+        * Spotlight, and later for linking of the results
+        */
+       private Hashtable<String, UriRef> textAnnotationsMap;
+
+       /**
+        * Initialize all parameters from the configuration panel, or with their
+        * default values
+        * 
+        * @param ce
+        *            the {@link ComponentContext}
+        */
+       @SuppressWarnings("unchecked")
+       protected void activate(ComponentContext ce) throws 
ConfigurationException,
+                       IOException {
+
+               super.activate(ce);
+
+               Dictionary<String, Object> properties = ce.getProperties();
+               spotlightUrl = properties.get(SL_URL_KEY) == null ? 
"http://spotlight.dbpedia.org/rest/annotate";
+                               : (String) properties.get(SL_URL_KEY);
+               spotlightDisambiguator = properties.get(SL_DISAMBIGUATOR) == 
null ? null
+                               : (String) properties.get(SL_DISAMBIGUATOR);
+               spotlightTypesRestriction = properties.get(SL_RESTRICTION) == 
null ? null
+                               : (String) properties.get(SL_RESTRICTION);
+               spotlightSparql = properties.get(SL_SPARQL) == null ? null
+                               : (String) properties.get(SL_SPARQL);
+               spotlightSupport = properties.get(SL_SUPPORT) == null ? "-1"
+                               : (String) properties.get(SL_SUPPORT);
+               spotlightConfidence = properties.get(SL_CONFIDENCE) == null ? 
"-1"
+                               : (String) properties.get(SL_CONFIDENCE);
+       }
+
+       /**
+        * Check if the content can be enhanced
+        * 
+        * @param ci
+        *            the {@link ContentItem}
+        */
+       public int canEnhance(ContentItem ci) throws EngineException {
+               if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
+                       return ENHANCE_SYNCHRONOUS;
+               } else {
+                       return CANNOT_ENHANCE;
+               }
+       }
+
+       /**
+        * Calculate the enhancements by doing a POST request to the DBpedia
+        * Spotlight endpoint and processing the results
+        * 
+        * @param ci
+        *            the {@link ContentItem}
+        */
+       public void computeEnhancements(ContentItem ci) throws EngineException {
+               Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
+                               SUPPORTED_MIMTYPES);
+               if (contentPart == null) {
+                       throw new IllegalStateException(
+                                       "No ContentPart with Mimetype '"
+                                                       + TEXT_PLAIN_MIMETYPE
+                                                       + "' found for 
ContentItem "
+                                                       + ci.getUri()
+                                                       + ": This is also 
checked in the canEnhance method! -> This "
+                                                       + "indicated an Bug in 
the implementation of the "
+                                                       + 
"EnhancementJobManager!");
+               }
+               String text = "";
+               try {
+                       text = 
ContentItemHelper.getText(contentPart.getValue());
+
+               } catch (IOException e) {
+                       throw new InvalidContentException(this, ci, e);
+               }
+
+               // Retrieve the existing text annotations (requires read lock)
+               MGraph graph = ci.getMetadata();
+               String xmlTextAnnotations = this.getSpottedXml(text, graph);
+               Collection<Annotation> dbpslGraph = doPostRequest(text,
+                               xmlTextAnnotations);
+               if (dbpslGraph != null) {
+                       // Acquire a write lock on the ContentItem when adding 
the
+                       // enhancements
+                       ci.getLock().writeLock().lock();
+                       try {
+                               createEnhancements(dbpslGraph, ci);
+                               if (log.isDebugEnabled()) {
+                                       Serializer serializer = 
Serializer.getInstance();
+                                       ByteArrayOutputStream debugStream = new 
ByteArrayOutputStream();
+                                       serializer.serialize(debugStream, 
ci.getMetadata(),
+                                                       "application/rdf+xml");
+                                       try {
+                                               log.debug("DBpedia 
Enhancements:\n{}",
+                                                               
debugStream.toString("UTF-8"));
+                                       } catch (UnsupportedEncodingException 
e) {
+                                               e.printStackTrace();
+                                       }
+                               }
+                       } finally {
+                               ci.getLock().writeLock().unlock();
+                       }
+               }
+       }
+
+       /**
+        * The method adds the returned DBpedia Spotlight annotations to the 
content
+        * item's metadata. For each DBpedia resource an EntityAnnotation is 
created
+        * and linked to the according TextAnnotation.
+        * 
+        * @param occs
+        *            a Collection of entity information
+        * @param ci
+        *            the content item
+        */
+       public void createEnhancements(Collection<Annotation> occs,
+                       ContentItem ci) {
+               final Language language; // used for plain literals 
representing parts
+                                                                       // fo 
the content
+               String langString = getMetadataLanguage(ci.getMetadata(), null);
+
+               if (langString != null && !langString.isEmpty()) {
+                       language = new Language(langString);
+               } else {
+                       language = null;
+               }
+
+               HashMap<Resource, UriRef> entityAnnotationMap = new 
HashMap<Resource, UriRef>();
+
+               for (Annotation occ : occs) {
+
+                       if (textAnnotationsMap.get(occ.surfaceForm) != null) {
+                               UriRef textAnnotation = 
textAnnotationsMap.get(occ.surfaceForm);
+                               MGraph model = ci.getMetadata();
+                               UriRef entityAnnotation = 
EnhancementEngineHelper
+                                               .createEntityEnhancement(ci, 
this);
+                               entityAnnotationMap.put(occ.uri, 
entityAnnotation);
+                               Literal label = new 
PlainLiteralImpl(occ.surfaceForm, language);
+                               model.add(new TripleImpl(entityAnnotation, 
DC_RELATION,
+                                               textAnnotation));
+                               model.add(new TripleImpl(entityAnnotation,
+                                               ENHANCER_ENTITY_LABEL, label));
+
+                               HashSet<String> t = occ.getTypeNames();
+                               if (t != null) {
+                                       Iterator<String> it = t.iterator();
+                                       while (it.hasNext())
+                                               model.add(new 
TripleImpl(entityAnnotation,
+                                                               
ENHANCER_ENTITY_TYPE, new UriRef(it.next())));
+                               }
+                               model.add(new TripleImpl(entityAnnotation,
+                                               ENHANCER_ENTITY_REFERENCE, 
occ.uri));
+                       }
+               }
+       }
+
+       /**
+        * Sends a POST request to the DBpediaSpotlight url.
+        * 
+        * @param text
+        *            a <code>String</code> with the text to be analyzed
+        * @param xmlTextAnnotations
+        * @param textAnnotations
+        * @return a <code>String</code> with the server response
+        * @throws EngineException
+        *             if the request cannot be sent
+        */
+       public Collection<Annotation> doPostRequest(String text,
+                       String xmlTextAnnotations) throws EngineException {
+               StringBuilder data = new StringBuilder();
+
+               try {
+                       data.append(URLEncoder.encode("spotter=SpotXmlParser", 
"UTF-8")
+                                       + "&");
+                       if (spotlightDisambiguator != null
+                                       && !spotlightDisambiguator.isEmpty())
+                               data.append(URLEncoder.encode("disambiguator", 
"UTF-8") + "="
+                                               + 
URLEncoder.encode(spotlightDisambiguator, "UTF-8")
+                                               + "&");
+                       if (spotlightTypesRestriction != null
+                                       && !spotlightTypesRestriction.isEmpty())
+                               data.append(URLEncoder.encode("types", "UTF-8") 
+ "="
+                                               + 
URLEncoder.encode(spotlightTypesRestriction, "UTF-8")
+                                               + "&");
+                       if (spotlightSupport != null && 
!spotlightSupport.isEmpty())
+                               data.append(URLEncoder.encode("support", 
"UTF-8") + "="
+                                               + 
URLEncoder.encode(spotlightSupport, "UTF-8") + "&");
+                       if (spotlightConfidence != null && 
!spotlightConfidence.isEmpty())
+                               data.append(URLEncoder.encode("confidence", 
"UTF-8") + "="
+                                               + 
URLEncoder.encode(spotlightConfidence, "UTF-8") + "&");
+                       if (spotlightSparql != null && 
!spotlightSparql.isEmpty()
+                                       && spotlightTypesRestriction == null)
+                               data.append(URLEncoder.encode("sparql", 
"UTF-8") + "="
+                                               + 
URLEncoder.encode(spotlightSparql, "UTF-8") + "&");
+                       data.append(URLEncoder.encode("text", "UTF-8") + "="
+                                       + URLEncoder.encode(xmlTextAnnotations, 
"UTF-8"));
+               } catch (UnsupportedEncodingException e) {
+                       throw new EngineException(
+                                       "Data for the httprequest could not be 
converted. Error: "
+                                                       + e.getMessage());
+               }
+
+               HttpURLConnection connection = null;
+               StringBuffer response = new StringBuffer();
+
+               try {
+                       // Create connection
+                       URL url = new URL(spotlightUrl);
+                       connection = (HttpURLConnection) url.openConnection();
+                       connection.setRequestMethod("POST");
+                       connection.setRequestProperty("Content-Type",
+                                       "application/x-www-form-urlencoded");
+                       connection.setRequestProperty("Accept", "text/xml");
+
+                       connection.setUseCaches(false);
+                       connection.setDoInput(true);
+                       connection.setDoOutput(true);
+
+                       // Send request
+                       DataOutputStream wr = new DataOutputStream(
+                                       connection.getOutputStream());
+                       wr.writeBytes(data.toString());
+                       wr.flush();
+                       wr.close();
+
+                       // Get Response
+                       InputStream is = connection.getInputStream();
+                       BufferedReader rd = new BufferedReader(new 
InputStreamReader(is));
+                       String line;
+                       while ((line = rd.readLine()) != null) {
+                               response.append(line);
+                               response.append('\r');
+                       }
+                       rd.close();
+
+               } catch (Exception e) {
+                       log.error("[request - error] The following error 
occurred: "
+                                       + e.getMessage());
+
+               } finally {
+
+                       if (connection != null) {
+                               connection.disconnect();
+                       }
+               }
+
+               XMLParser xmlParser = new XMLParser();
+               try {
+                       Document xmlDoc = 
xmlParser.loadXMLFromString(response.toString());
+                       NodeList nlist = xmlParser.getElementsByTagName(xmlDoc, 
"Resource");
+                       Collection<Annotation> annos = 
this.getAnnotations(nlist);
+
+                       return annos;
+               } catch (Exception e) {
+                       throw new EngineException(
+                                       "Response XML could not be parsed. 
Error: "
+                                                       + e.getMessage());
+               }
+       }
+
+       private String getSpottedXml(String text, MGraph graph) {
+               StringBuilder xml = new StringBuilder();
+               textAnnotationsMap = new Hashtable<String, UriRef>();
+
+               xml.append(String.format("<annotation text=\"%s\">", text));
+               try {
+                       for (Iterator<Triple> it = graph.filter(null, RDF_TYPE,
+                                       
TechnicalClasses.ENHANCER_TEXTANNOTATION); it.hasNext();) {
+                               // Triple tAnnotation = it.next();
+                               UriRef uri = (UriRef) it.next().getSubject();
+                               String surfaceForm = 
EnhancementEngineHelper.getString(graph,
+                                               uri, ENHANCER_SELECTED_TEXT);
+                               if (surfaceForm != null) {
+                                       String offset = 
EnhancementEngineHelper.getString(graph,
+                                                       uri, ENHANCER_START);
+                                       textAnnotationsMap.put(surfaceForm, 
uri);
+                                       xml.append(String.format(
+                                                       "<surfaceForm 
name=\"%s\" offset=\"%s\"/>",
+                                                       surfaceForm, offset));
+                               }
+                       }
+               } catch (Exception e) {
+                       log.error(e.getMessage());
+               }
+
+               return xml.append("</annotation>").toString();
+       }
+
+       /**
+        * This method creates the Collection of Annotations, which the method
+        * <code>createEnhancement</code> adds to the meta data of the content 
item.
+        * 
+        * @param nList
+        *            NodeList of all Resources contained in the XML response 
from
+        *            DBpedia Spotlight
+        * @return a Collection<DBPSLAnnotation> with all annotations
+        */
+       private Collection<Annotation> getAnnotations(NodeList nList) {
+               Collection<Annotation> dbpslAnnos = new HashSet<Annotation>();
+
+               for (int temp = 0; temp < nList.getLength(); temp++) {
+                       Annotation dbpslann = new Annotation();
+                       Element node = (Element) nList.item(temp);
+                       dbpslann.uri = new UriRef(node.getAttribute("URI"));
+                       dbpslann.support = (new 
Integer(node.getAttribute("support")))
+                                       .intValue();
+                       dbpslann.types = node.getAttribute("types");
+                       dbpslann.surfaceForm = node.getAttribute("surfaceForm");
+                       dbpslann.offset = (new 
Integer(node.getAttribute("offset")))
+                                       .intValue();
+                       dbpslann.similarityScore = (new Double(
+                                       
node.getAttribute("similarityScore"))).doubleValue();
+                       dbpslann.percentageOfSecondRank = (new Double(
+                                       
node.getAttribute("percentageOfSecondRank"))).doubleValue();
+
+                       dbpslAnnos.add(dbpslann);
+               }
+
+               return dbpslAnnos;
+       }
+
+       public Map<String, Object> getServiceProperties() {
+               return Collections.unmodifiableMap(Collections.singletonMap(
+                               ENHANCEMENT_ENGINE_ORDERING, (Object) 
defaultOrder));
+       }
+
+       public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+               Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+               if (it.hasNext()) {
+                       Resource langNode = it.next().getObject();
+                       return getLexicalForm(langNode);
+               }
+               return null;
+       }
+
+       public String getLexicalForm(Resource res) {
+               if (res == null) {
+                       return null;
+               } else if (res instanceof Literal) {
+                       return ((Literal) res).getLexicalForm();
+               } else {
+                       return res.toString();
+               }
+       }
+
+       /**
+        * This method is used by the test class to set the endpoint url
+        * 
+        * @param url
+        *            String the url of the Spotlight endpoint
+        */
+       public void setEndpointUrl(String url) {
+               spotlightUrl = url;
+       }
+
+}

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/XMLParser.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/XMLParser.java?rev=1375110&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/XMLParser.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/XMLParser.java
 Mon Aug 20 17:14:56 2012
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+/**
+ * Parses the XML results given by DBPedia Spotlight.
+ * 
+ * @author <a href="mailto:[email protected]";>Iavor Jelev</a>
+ */
+
+public class XMLParser {
+
+       public NodeList getElementsByTagName(Document doc, String tagName) {
+
+               return doc.getElementsByTagName(tagName);
+       }
+
+       public Document loadXMLFromString(String xml) throws SAXException,
+                       IOException {
+               Document doc = loadXMLFromInputStream(new ByteArrayInputStream(
+                               xml.getBytes()));
+               doc.getDocumentElement().normalize();
+
+               return doc;
+       }
+
+       public Document loadXMLFromInputStream(InputStream is) throws 
SAXException,
+                       IOException {
+               DocumentBuilderFactory factory = 
DocumentBuilderFactory.newInstance();
+               factory.setNamespaceAware(true);
+               DocumentBuilder builder = null;
+               try {
+                       builder = factory.newDocumentBuilder();
+               } catch (ParserConfigurationException ex) {
+               }
+               Document doc = builder.parse(is);
+               is.close();
+               doc.getDocumentElement().normalize();
+
+               return doc;
+       }
+
+       public Document loadXMLFromFile(String filePath)
+                       throws ParserConfigurationException, SAXException, 
IOException {
+               File fXmlFile = new File(filePath);
+               DocumentBuilderFactory dbFactory = 
DocumentBuilderFactory.newInstance();
+               DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+               Document doc = dBuilder.parse(fXmlFile);
+               doc.getDocumentElement().normalize();
+
+               return doc;
+       }
+}
\ No newline at end of file

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java?rev=1375110&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java
 Mon Aug 20 17:14:56 2012
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate.core;
+
+import java.util.Collection;
+
+import org.apache.commons.io.IOUtils;
+import 
org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate.Annotation;
+import 
org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate.DBPSpotlightDisambiguateEnhancementEngine;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.osgi.service.cm.ConfigurationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class provides a JUnit test for DBpedia Spotlight Annotate
+ * EnhancementEngine.
+ * 
+ * @author Iavor Jelev, babelmonkeys / GzEvD
+ */
+public class DBPSpotlightDisambiguateEnhancementTest {
+
+       /**
+        * This contains the logger.
+        */
+       private static final Logger LOG = LoggerFactory
+                       
.getLogger(DBPSpotlightDisambiguateEnhancementTest.class);
+       private static String SPL_URL = System
+                       
.getProperty(DBPSpotlightDisambiguateEnhancementEngine.SL_URL_KEY) == null ? 
"http://spotlight.dbpedia.org/rest/annotate";
+                       : (String) System
+                                       
.getProperty(DBPSpotlightDisambiguateEnhancementEngine.SL_URL_KEY);
+       private static String TEST_TEXT = "President Obama is meeting Angela 
Merkel in Berlin on Monday.";
+       private static DBPSpotlightDisambiguateEnhancementEngine dbpslight;
+       private static String testFile = "spots.xml";
+       private static String spotsXml;
+
+       @BeforeClass
+       public static void oneTimeSetup() throws ConfigurationException {
+               dbpslight = new DBPSpotlightDisambiguateEnhancementEngine();
+               dbpslight.setEndpointUrl(SPL_URL);
+       }
+
+       @Test
+       public void testEntityExtraction() {
+               Collection<Annotation> entities;
+               try {
+                       spotsXml = 
IOUtils.toString(this.getClass().getClassLoader()
+                                       .getResourceAsStream(testFile));
+                       System.out.println(SPL_URL);
+                       entities = dbpslight.doPostRequest(TEST_TEXT, spotsXml);
+                       LOG.info("Found entities: {}", entities.size());
+                       LOG.debug("Entities:\n{}", entities);
+                       Assert.assertFalse("No entities were found!", 
entities.isEmpty());
+               } catch (Exception e) {
+                       Assert.assertFalse("An EngineException occurred! The 
message was: "
+                                       + e.getMessage(), true);
+               }
+       }
+}

Modified: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml?rev=1375110&r1=1375107&r2=1375110&view=diff
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml
 (original)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml
 Mon Aug 20 17:14:56 2012
@@ -22,7 +22,7 @@
        </parent>
 
        <groupId>org.apache.stanbol</groupId>
-       
<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightspot</artifactId>
+       
<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.spot</artifactId>
        <packaging>bundle</packaging>
 
        <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight 
Spot</name>
@@ -43,7 +43,7 @@
                                <configuration>
                                        <instructions>
                                                <Export-Package>
-                                                       
org.apache.stanbol.enhancer.engines.dbpspotlightspot;version=${project.version}
+                                                       
org.apache.stanbol.enhancer.engines.dbpspotlight.spot;version=${project.version}
                                                </Export-Package>
                                                <Embed-Dependency>
                                                </Embed-Dependency>

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java?rev=1375110&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java
 Mon Aug 20 17:14:56 2012
@@ -0,0 +1,429 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
+
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import 
org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * {@link DBPSpotlightSpotEnhancementEngine} provides functionality to enhance
+ * document with their language.
+ * 
+ * @author Iavor Jelev, Babelmonkeys (GzEvD)
+ */
+@Component(metatype = true, immediate = true, label = 
"%stanbol.DBPSpotlightSpotEnhancementEngine.name", description = 
"%stanbol.DBPSpotlightSpotEnhancementEngine.description")
+@Service
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value 
= "dbpspotlightspot") })
+public class DBPSpotlightSpotEnhancementEngine extends
+               AbstractEnhancementEngine<IOException, RuntimeException> 
implements
+               EnhancementEngine, ServiceProperties {
+
+       /**
+        * a configurable value of the text segment length to check
+        */
+       @Property(value = "http://spotlight.dbpedia.org/rest/spot";)
+       public static final String SL_URL_KEY = 
"stanbol.DBPSpotlightSpotEnhancementEngine.url";
+
+       @Property(value = "LingPipeSpotter")
+       public static final String SL_SPOTTER = 
"stanbol.DBPSpotlightSpotEnhancementEngine.spotter";
+
+       /**
+        * The default value for the Execution of this Engine. Currently set to
+        * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
+        */
+       public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION 
- 29;
+
+       /**
+        * This contains the only MIME type directly supported by this 
enhancement
+        * engine.
+        */
+       private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+       /**
+        * Set containing the only supported mime type {@link 
#TEXT_PLAIN_MIMETYPE}
+        */
+       private static final Set<String> SUPPORTED_MIMTYPES = Collections
+                       .singleton(TEXT_PLAIN_MIMETYPE);
+
+       /**
+        * This contains a list of languages supported by DBpedia Spotlight. If 
the
+        * metadata doesn't contain a value for the language as the value of the
+        * {@link Property.DC_LANG property} the content can't be processed.
+        */
+       protected static final Set<String> SUPPORTED_LANGUAGES = Collections
+                       .unmodifiableSet(new 
HashSet<String>(Arrays.asList("en")));
+
+       /** holds the logger. */
+       private static final Logger log = LoggerFactory
+                       .getLogger(DBPSpotlightSpotEnhancementEngine.class);
+
+       /** holds the url of the Spotlight REST endpoint */
+       private String spotlightUrl;
+       /** holds the chosen of spotter to be used */
+       private String spotlightSpotter;
+
+       /**
+        * Initialize all parameters from the configuration panel, or with their
+        * default values
+        * 
+        * @param ce
+        *            the {@link ComponentContext}
+        */
+       @SuppressWarnings("unchecked")
+       protected void activate(ComponentContext ce) throws 
ConfigurationException,
+                       IOException {
+
+               super.activate(ce);
+
+               Dictionary<String, Object> properties = ce.getProperties();
+               spotlightUrl = properties.get(SL_URL_KEY) == null ? 
"http://spotlight.dbpedia.org/rest/spot";
+                               : (String) properties.get(SL_URL_KEY);
+               spotlightSpotter = properties.get(SL_SPOTTER) == null ? null
+                               : (String) properties.get(SL_SPOTTER);
+       }
+
+       /**
+        * Check if the content can be enhanced
+        * 
+        * @param ci
+        *            the {@link ContentItem}
+        */
+       public int canEnhance(ContentItem ci) throws EngineException {
+               if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
+                       String language = getMetadataLanguage(ci.getMetadata(), 
null);
+                       if (language != null && 
!SUPPORTED_LANGUAGES.contains(language)) {
+                               log.info(
+                                               "DBpedia Spotlight can not 
process ContentItem {} because "
+                                                               + "language {} 
is not supported (supported: {})",
+                                               new Object[] { ci.getUri(), 
language,
+                                                               
SUPPORTED_LANGUAGES });
+                               return CANNOT_ENHANCE;
+                       }
+                       return ENHANCE_SYNCHRONOUS;
+               }
+               return CANNOT_ENHANCE;
+       }
+
+       /**
+        * Calculate the enhancements by doing a POST request to the DBpedia
+        * Spotlight endpoint and processing the results
+        * 
+        * @param ci
+        *            the {@link ContentItem}
+        */
+       public void computeEnhancements(ContentItem ci) throws EngineException {
+               Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
+                               SUPPORTED_MIMTYPES);
+               if (contentPart == null) {
+                       throw new IllegalStateException(
+                                       "No ContentPart with Mimetype '"
+                                                       + TEXT_PLAIN_MIMETYPE
+                                                       + "' found for 
ContentItem "
+                                                       + ci.getUri()
+                                                       + ": This is also 
checked in the canEnhance method! -> This "
+                                                       + "indicated an Bug in 
the implementation of the "
+                                                       + 
"EnhancementJobManager!");
+               }
+               String text = "";
+               try {
+                       text = 
ContentItemHelper.getText(contentPart.getValue());
+               } catch (IOException e) {
+                       throw new InvalidContentException(this, ci, e);
+               }
+
+               Collection<SurfaceForm> dbpslGraph = doPostRequest(text);
+               if (dbpslGraph != null) {
+                       // Acquire a write lock on the ContentItem when adding 
the
+                       // enhancements
+                       ci.getLock().writeLock().lock();
+                       try {
+                               createEnhancements(dbpslGraph, ci);
+                               if (log.isDebugEnabled()) {
+                                       Serializer serializer = 
Serializer.getInstance();
+                                       ByteArrayOutputStream debugStream = new 
ByteArrayOutputStream();
+                                       serializer.serialize(debugStream, 
ci.getMetadata(),
+                                                       "application/rdf+xml");
+                                       try {
+                                               log.debug("DBpedia Spotlight 
Spot Enhancements:\n{}",
+                                                               
debugStream.toString("UTF-8"));
+                                       } catch (UnsupportedEncodingException 
e) {
+                                               e.printStackTrace();
+                                       }
+                               }
+                       } finally {
+                               ci.getLock().writeLock().unlock();
+                       }
+               }
+       }
+
+       /**
+        * The method adds the returned DBpedia Spotlight surface forms to the
+        * content item's metadata. For each one an TextAnnotation is created.
+        * 
+        * @param occs
+        *            a Collection of entity information
+        * @param ci
+        *            the content item
+        */
+       public void createEnhancements(Collection<SurfaceForm> occs,
+                       ContentItem ci) {
+               LiteralFactory literalFactory = LiteralFactory.getInstance();
+               final Language language; // used for plain literals 
representing parts
+                                                                       // fo 
the content
+               String langString = getMetadataLanguage(ci.getMetadata(), null);
+
+               if (langString != null && !langString.isEmpty()) {
+                       language = new Language(langString);
+               } else {
+                       language = null;
+               }
+
+               HashMap<String, UriRef> entityAnnotationMap = new 
HashMap<String, UriRef>();
+
+               for (SurfaceForm occ : occs) {
+                       UriRef textAnnotation = EnhancementEngineHelper
+                                       .createTextEnhancement(ci, this);
+                       MGraph model = ci.getMetadata();
+
+                       model.add(new TripleImpl(textAnnotation, 
ENHANCER_SELECTED_TEXT,
+                                       new PlainLiteralImpl(occ.name, 
language)));
+                       model.add(new TripleImpl(textAnnotation, ENHANCER_START,
+                                       
literalFactory.createTypedLiteral(occ.offset)));
+                       model.add(new TripleImpl(textAnnotation, ENHANCER_END,
+                                       
literalFactory.createTypedLiteral(occ.offset
+                                                       + occ.name.length())));
+                       model.add(new TripleImpl(textAnnotation, DC_TYPE, new 
UriRef(
+                                       occ.type)));
+                       // TODO ################## model.add(new 
TripleImpl(textAnnotation,
+                       // ENHANCER_SELECTION_CONTEXT, new
+                       // PlainLiteralImpl(occ.context,language)));
+
+                       if (entityAnnotationMap.containsKey(occ.name)) {
+                               model.add(new 
TripleImpl(entityAnnotationMap.get(occ.name),
+                                               DC_RELATION, textAnnotation));
+                       } else {
+                               entityAnnotationMap.put(occ.name, 
textAnnotation);
+                       }
+               }
+       }
+
+       /**
+        * Sends a POST request to the DBpediaSpotlight url.
+        * 
+        * @param text
+        *            a <code>String</code> with the text to be analyzed
+        * @return a <code>String</code> with the server response
+        * @throws EngineException
+        *             if the request cannot be sent
+        */
+       public Collection<SurfaceForm> doPostRequest(String text)
+                       throws EngineException {
+               StringBuilder data = new StringBuilder();
+               try {
+                       if (spotlightSpotter != null && 
!spotlightSpotter.isEmpty())
+                               data.append(URLEncoder.encode("spotter", 
"UTF-8") + "="
+                                               + 
URLEncoder.encode(spotlightSpotter, "UTF-8") + "&");
+                       data.append(URLEncoder.encode("text", "UTF-8") + "="
+                                       + URLEncoder.encode(text, "UTF-8"));
+               } catch (UnsupportedEncodingException e) {
+                       throw new EngineException(
+                                       "Data for the httprequest could not be 
converted. Error: "
+                                                       + e.getMessage());
+               }
+
+               HttpURLConnection connection = null;
+               StringBuffer response = new StringBuffer();
+
+               try {
+                       // Create connection
+                       URL url = new URL(spotlightUrl);
+                       connection = (HttpURLConnection) url.openConnection();
+                       connection.setRequestMethod("POST");
+                       connection.setRequestProperty("Content-Type",
+                                       "application/x-www-form-urlencoded");
+                       connection.setRequestProperty("Accept", "text/xml");
+
+                       connection.setUseCaches(false);
+                       connection.setDoInput(true);
+                       connection.setDoOutput(true);
+
+                       // Send request
+                       DataOutputStream wr = new DataOutputStream(
+                                       connection.getOutputStream());
+                       wr.writeBytes(data.toString());
+                       wr.flush();
+                       wr.close();
+
+                       // Get Response
+                       InputStream is = connection.getInputStream();
+                       BufferedReader rd = new BufferedReader(new 
InputStreamReader(is));
+                       String line;
+                       while ((line = rd.readLine()) != null) {
+                               response.append(line);
+                               response.append('\r');
+                       }
+                       rd.close();
+
+               } catch (Exception e) {
+
+                       log.error("[request] Request could not be made. Error: "
+                                       + e.getMessage());
+                       e.printStackTrace();
+                       return null;
+
+               } finally {
+
+                       if (connection != null) {
+                               connection.disconnect();
+                       }
+               }
+
+               XMLParser xmlParser = new XMLParser();
+               try {
+                       Document xmlDoc = 
xmlParser.loadXMLFromString(response.toString());
+                       NodeList nlist = xmlParser.getElementsByTagName(xmlDoc,
+                                       "surfaceForm");
+                       Collection<SurfaceForm> annos = 
this.getAnnotations(nlist);
+
+                       return annos;
+               } catch (Exception e) {
+                       log.error("[response] Response XML could not be parsed. 
Error: "
+                                       + e.getMessage());
+                       throw new EngineException(
+                                       "Response XML could not be parsed. 
Error: "
+                                                       + e.getMessage());
+               }
+       }
+
+       /**
+        * This method creates the Collection of surface forms, which the method
+        * <code>createEnhancement</code> adds to the meta data of the content 
item
+        * as TextAnnotations.
+        * 
+        * @param nList
+        *            NodeList of all Resources contained in the XML response 
from
+        *            DBpedia Spotlight
+        * @return a Collection<DBPSLSurfaceForm> with all annotations
+        */
+       private Collection<SurfaceForm> getAnnotations(NodeList nList) {
+               Collection<SurfaceForm> dbpslAnnos = new HashSet<SurfaceForm>();
+
+               for (int temp = 0; temp < nList.getLength(); temp++) {
+                       SurfaceForm dbpslann = new SurfaceForm();
+                       Element node = (Element) nList.item(temp);
+                       dbpslann.name = node.getAttribute("name");
+                       dbpslann.offset = (new 
Integer(node.getAttribute("offset")))
+                                       .intValue();
+                       dbpslann.type = node.getAttribute("type");
+
+                       dbpslAnnos.add(dbpslann);
+               }
+
+               return dbpslAnnos;
+       }
+
+       public Map<String, Object> getServiceProperties() {
+               return Collections.unmodifiableMap(Collections.singletonMap(
+                               ENHANCEMENT_ENGINE_ORDERING, (Object) 
defaultOrder));
+       }
+
+       public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+               Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+               if (it.hasNext()) {
+                       Resource langNode = it.next().getObject();
+                       return getLexicalForm(langNode);
+               }
+               return null;
+       }
+
+       public String getLexicalForm(Resource res) {
+               if (res == null) {
+                       return null;
+               } else if (res instanceof Literal) {
+                       return ((Literal) res).getLexicalForm();
+               } else {
+                       return res.toString();
+               }
+       }
+
+       /**
+        * This method is used by the test class to set the endpoint url
+        * 
+        * @param url
+        *            String the url of the Spotlight endpoint
+        */
+       public void setEndpointUrl(String url) {
+               spotlightUrl = url;
+       }
+
+}

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/SurfaceForm.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/SurfaceForm.java?rev=1375110&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/SurfaceForm.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/SurfaceForm.java
 Mon Aug 20 17:14:56 2012
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
+
+//import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Stores the surface forms given by DBPedia Spotlight Spot.
+ * 
+ * @author <a href="mailto:[email protected]";>Iavor Jelev</a>
+ */
+public class SurfaceForm {
+
+       public String name;
+       public String type;
+       public Integer offset;
+
+       public String toString() {
+               return String.format("[name=%s, offset=%i, type=%s]", name, 
offset,
+                               type);
+       }
+}

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java?rev=1375110&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java
 Mon Aug 20 17:14:56 2012
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+/**
+ * Parses the XML results given by DBPedia Spotlight.
+ * 
+ * @author <a href="mailto:[email protected]";>Iavor Jelev</a>
+ */
+
+public class XMLParser {
+
+       public NodeList getElementsByTagName(Document doc, String tagName) {
+
+               return doc.getElementsByTagName(tagName);
+       }
+
+       public Document loadXMLFromString(String xml) throws SAXException,
+                       IOException {
+               Document doc = loadXMLFromInputStream(new ByteArrayInputStream(
+                               xml.getBytes()));
+               doc.getDocumentElement().normalize();
+
+               return doc;
+       }
+
+       public Document loadXMLFromInputStream(InputStream is) throws 
SAXException,
+                       IOException {
+               DocumentBuilderFactory factory = 
DocumentBuilderFactory.newInstance();
+               factory.setNamespaceAware(true);
+               DocumentBuilder builder = null;
+               try {
+                       builder = factory.newDocumentBuilder();
+               } catch (ParserConfigurationException ex) {
+               }
+               Document doc = builder.parse(is);
+               is.close();
+               doc.getDocumentElement().normalize();
+
+               return doc;
+       }
+
+       public Document loadXMLFromFile(String filePath)
+                       throws ParserConfigurationException, SAXException, 
IOException {
+               File fXmlFile = new File(filePath);
+               DocumentBuilderFactory dbFactory = 
DocumentBuilderFactory.newInstance();
+               DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+               Document doc = dBuilder.parse(fXmlFile);
+               doc.getDocumentElement().normalize();
+
+               return doc;
+       }
+}
\ No newline at end of file

Added: 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java?rev=1375110&view=auto
==============================================================================
--- 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java
 (added)
+++ 
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java
 Mon Aug 20 17:14:56 2012
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.spot.core;
+
+import java.util.Collection;
+
+import 
org.apache.stanbol.enhancer.engines.dbpspotlight.spot.DBPSpotlightSpotEnhancementEngine;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.spot.SurfaceForm;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.osgi.service.cm.ConfigurationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class provides a JUnit test for DBpedia Spotlight Spot
+ * EnhancementEngine.
+ * 
+ * @author Iavor Jelev, babelmonkeys / GzEvD
+ */
+public class DBPSpotlightSpotEnhancementTest {
+
+       /**
+        * This contains the logger.
+        */
+       private static final Logger LOG = LoggerFactory
+                       .getLogger(DBPSpotlightSpotEnhancementTest.class);
+       private static String SPL_URL = System
+                       
.getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY) == null ? 
"http://spotlight.dbpedia.org/rest/spot";
+                       : (String) System
+                                       
.getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY);
+       private static String TEST_TEXT = "President Obama is meeting Angela 
Merkel in Berlin on Monday";
+       private static DBPSpotlightSpotEnhancementEngine dbpslight;
+
+       @BeforeClass
+       public static void oneTimeSetup() throws ConfigurationException {
+               dbpslight = new DBPSpotlightSpotEnhancementEngine();
+               dbpslight.setEndpointUrl(SPL_URL);
+       }
+
+       @Test
+       public void testEntityExtraction() {
+               Collection<SurfaceForm> entities;
+               try {
+                       entities = dbpslight.doPostRequest(TEST_TEXT);
+                       LOG.info("Found entities: {}", entities.size());
+                       LOG.debug("Entities:\n{}", entities);
+                       Assert.assertFalse("No entities were found!", 
entities.isEmpty());
+               } catch (EngineException e) {
+                       Assert.assertFalse("An EngineException occurred! The 
message was: "
+                                       + e.getMessage(), true);
+               }
+       }
+
+}

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml?rev=1375110&r1=1375109&r2=1375110&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml 
(original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml Mon 
Aug 20 17:14:56 2012
@@ -57,9 +57,9 @@
     <module>opencalais</module> <!-- http://opencalais.com/ -->
     <module>zemanta</module> <!-- htt://zemanta.com -->
     <!-- DBpedia.org Spotlight Enhancement Engines (STANBOL-706) -->
-    <module>dbpspotlightannotate</module>
-    <module>dbpspotlightcandidates</module>
-    <module>dbpspotlightdisambiguate</module>
-    <module>dbpspotlightspot</module>
+    <module>dbpedia-spotlight-annotate</module>
+    <module>dbpedia-spotlight-candidates</module>
+    <module>dbpedia-spotlight-disambiguate</module>
+    <module>dbpedia-spotlight-spot</module>
   </modules>
 </project>


Reply via email to