Author: rwesten
Date: Sat Mar  3 21:17:23 2012
New Revision: 1296704

URL: http://svn.apache.org/viewvc?rev=1296704&view=rev
Log:
STANBOL-512: Added support for Tika metadata -> Ontology mappings


Added:
    
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/
    
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt 
  (with props)
    
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testASF.asf   
(with props)
    
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testGIF.gif   
(with props)
    
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_EXIF.jpg
   (with props)
    
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_GEO.jpg
   (with props)
    
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMID.mid   
(with props)
    
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3id3v24.mp3
   (with props)
    
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3lyrics.mp3
   (with props)
    
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testTIFF.tif   
(with props)
Modified:
    incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
    
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
    
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/handler/PlainTextHandler.java
    
incubator/stanbol/trunk/enhancer/engines/tika/src/main/resources/OSGI-INF/metatype/metatype.properties
    
incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
    incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/README
    
incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/NamespaceEnum.java
    incubator/stanbol/trunk/enhancer/ldpath/pom.xml
    
incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
    incubator/stanbol/trunk/parent/pom.xml

Modified: incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/pom.xml?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/pom.xml Sat Mar  3 21:17:23 
2012
@@ -75,14 +75,23 @@
             <exclude>src/license/THIRD-PARTY.properties</exclude>
             <!-- AL20 License for test resources (see 
src/test/resources/README) -->
             <exclude>src/test/resources/test.doc</exclude>
+            <exclude>src/test/resources/test.email.txt</exclude>
             <exclude>src/test/resources/test.html</exclude>
-            <exclude>src/test/resources/test.xhtml</exclude>
             <exclude>src/test/resources/test.odt</exclude>
             <exclude>src/test/resources/test.pages</exclude>
             <exclude>src/test/resources/test.pdf</exclude>
             <exclude>src/test/resources/test.rtf</exclude>
+            <exclude>src/test/resources/test.xhtml</exclude>
             <exclude>src/test/resources/test2.html</exclude>
             <exclude>src/test/resources/test2.pdf</exclude>
+            <exclude>src/test/resources/testASF.asf</exclude>
+            <exclude>src/test/resources/testGIF.gif</exclude>
+            <exclude>src/test/resources/testJPEG_EXIF.jpg</exclude>
+            <exclude>src/test/resources/testJPEG_GEO.jpg</exclude>
+            <exclude>src/test/resources/testMID.mid</exclude>
+            <exclude>src/test/resources/testMP3id3v24.mp3</exclude>
+            <exclude>src/test/resources/testMP3lyrics.mp3</exclude>
+            <exclude>src/test/resources/testTIFF.tif</exclude>
           </excludes>
         </configuration>
       </plugin>
@@ -114,6 +123,10 @@
       <groupId>org.apache.clerezza</groupId>
       <artifactId>rdf.core</artifactId>
     </dependency>
+    <dependency> <!-- for metadata mappings -->
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.ontologies</artifactId>
+    </dependency>
     <dependency>
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>

Modified: 
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
 Sat Mar  3 21:17:23 2012
@@ -16,6 +16,13 @@
 */
 package org.apache.stanbol.enhancer.engines.tika;
 
+import static 
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addDcMappings;
+import static 
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addGeoMappings;
+import static 
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addMediaResourceOntologyMappings;
+import static 
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addNepomukExifMappings;
+import static 
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addNepomukMessageMappings;
+import static 
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addRdfsMappings;
+import static 
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addSkosMappings;
 import static 
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
 import static org.apache.tika.mime.MediaType.TEXT_PLAIN;
 
@@ -23,9 +30,12 @@ import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.StringWriter;
+import java.util.Arrays;
 import java.util.Collections;
+import java.util.Dictionary;
 import java.util.Map;
 
+import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.commons.io.IOUtils;
 import org.apache.felix.scr.annotations.Component;
@@ -34,6 +44,7 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.enhancer.engines.tika.handler.MultiHandler;
 import org.apache.stanbol.enhancer.engines.tika.handler.PlainTextHandler;
+import org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -67,12 +78,43 @@ import org.xml.sax.ContentHandler;
 @Component(immediate = true, metatype = true, inherit=true)
 @Service
 @Properties(value={
-    @Property(name=EnhancementEngine.PROPERTY_NAME,value="tika")
+    @Property(name=EnhancementEngine.PROPERTY_NAME,value="tika"),
+    @Property(name=TikaEngine.SKIP_LINEBREAKS_WITHIN_CONTENT, 
boolValue=TikaEngine.DEFAULT_SKIP_LINEBREAKS),
+    
@Property(name=TikaEngine.MAPPING_MEDIA_RESOURCE,boolValue=TikaEngine.DEFAULT_MAPPING_MEDIA_RESOURCE_STATE),
+    
@Property(name=TikaEngine.MAPPING_DUBLIN_CORE_TERMS,boolValue=TikaEngine.DEFAULT_MAPPING_DUBLIN_CORE_TERMS_STATE),
+    
@Property(name=TikaEngine.MAPPING_NEPOMUK_MESSAGE,boolValue=TikaEngine.DEFAULT_MAPPING_NEPOMUK_MESSAGE_STATE),
+    
@Property(name=TikaEngine.MAPPING_NEPOMUK_EXIF,boolValue=TikaEngine.DEFAULT_MAPPING_NEPOMUK_EXIF_STATE),
+    
@Property(name=TikaEngine.MAPPING_SKOS,boolValue=TikaEngine.DEFAULT_MAPPING_SKOS_STATE),
+    
@Property(name=TikaEngine.MAPPING_RDFS,boolValue=TikaEngine.DEFAULT_MAPPING_RDFS_STATE),
+    
@Property(name=TikaEngine.MAPPING_GEO,boolValue=TikaEngine.DEFAULT_MAPPING_GEO_STATE)
 })
 public class TikaEngine 
         extends AbstractEnhancementEngine<RuntimeException,RuntimeException> 
         implements EnhancementEngine, ServiceProperties {
     private final Logger log = LoggerFactory.getLogger(TikaEngine.class);
+    
+    private final LiteralFactory lf = LiteralFactory.getInstance();
+    
+    public static final String SKIP_LINEBREAKS_WITHIN_CONTENT = 
"stanbol.engines.tika.skipLinebreaks";
+    //Metadata -> Ontology mapping configuration
+    public static final String MAPPING_MEDIA_RESOURCE = 
"stanbol.engine.tika.mapping.mediaResource";
+    public static final boolean DEFAULT_MAPPING_MEDIA_RESOURCE_STATE = true;
+    public static final String MAPPING_DUBLIN_CORE_TERMS = 
"stanbol.engine.tika.mapping.dcTerms";
+    public static final boolean DEFAULT_MAPPING_DUBLIN_CORE_TERMS_STATE = true;
+    public static final String MAPPING_NEPOMUK_MESSAGE = 
"stanbol.engine.tika.mapping.nepomukMessage";
+    public static final boolean DEFAULT_MAPPING_NEPOMUK_MESSAGE_STATE = true;
+    public static final String MAPPING_NEPOMUK_EXIF = 
"stanbol.engine.tika.mapping.nepomukExif";
+    public static final boolean DEFAULT_MAPPING_NEPOMUK_EXIF_STATE = true;
+    public static final String MAPPING_SKOS = 
"stanbol.engine.tika.mapping.skos";
+    public static final boolean DEFAULT_MAPPING_SKOS_STATE = false;
+    public static final String MAPPING_RDFS = 
"stanbol.engine.tika.mapping.rdfs";
+    public static final boolean DEFAULT_MAPPING_RDFS_STATE = false;
+    public static final String MAPPING_GEO = "stanbol.engine.tika.mapping.geo";
+    public static final boolean DEFAULT_MAPPING_GEO_STATE = true;
+    
+    public static final boolean DEFAULT_SKIP_LINEBREAKS = false;
+    
+    private boolean skipLinebreaks = DEFAULT_SKIP_LINEBREAKS;
     /**
      * The default value for the Execution of this Engine. Currently set to
      * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
@@ -84,7 +126,8 @@ public class TikaEngine 
     private TikaConfig config;
     private Parser parser;
     private Detector detector;
-
+    private OntologyMappings ontologyMappings;
+    
     private static class MediaTypeAndStream {
         MediaType mediaType;
         InputStream in;
@@ -119,7 +162,7 @@ public class TikaEngine 
             metadata.set(Metadata.CONTENT_TYPE, mtas.mediaType.toString());
             final StringWriter writer = new StringWriter();
             final ContentHandler textHandler = new BodyContentHandler( //only 
the Body
-                new PlainTextHandler(writer, true,false)); //skip ignoreable
+                new PlainTextHandler(writer, false,skipLinebreaks)); //skip 
ignoreable
             final ToXMLContentHandler xhtmlHandler;
             final ContentHandler mainHandler;
             if(!plainMediaType.equals(XHTML)){ //do not parse XHTML from XHTML
@@ -137,25 +180,40 @@ public class TikaEngine 
                         "plain text!",e);
             }
             IOUtils.closeQuietly(in);
-//            log.info("Plain Content: \n{} \n",writer.toString());
+            if(log.isDebugEnabled()){
+                log.debug("Plain Content: \n{}",writer.toString());
+            }
             String random = randomUUID().toString();
             UriRef textBlobUri = new UriRef("urn:tika:text:"+random);
             ci.addPart(textBlobUri, 
                 new InMemoryBlob(writer.toString(), 
                     TEXT_PLAIN.toString())); //string -> no encoding
             if(xhtmlHandler != null){
-//                log.info("XML Content: \n{} \n",xhtmlHandler.toString());
+                if(log.isDebugEnabled()){
+                    log.debug("XML Content: \n{}",xhtmlHandler.toString());
+                }
                 UriRef xhtmlBlobUri = new UriRef("urn:tika:xhtml:"+random);
                 ci.addPart(xhtmlBlobUri, 
                     new InMemoryBlob(xhtmlHandler.toString(),
                         "application/xhtml+xml")); //string -> no encoding
             }
-            //TODO:
-            // * add also the Metadata extracted by Apache Tika
+            //add the extracted metadata
+            if(log.isDebugEnabled()){
+                for(String name : metadata.names()){
+                    log.debug("{}: 
{}",name,Arrays.toString(metadata.getValues(name)));
+                }
+            }
+            ci.getLock().writeLock().lock();
+            try {
+                ontologyMappings.apply(ci.getMetadata(), ci.getUri(), 
metadata);
+            }finally{
+                ci.getLock().writeLock().unlock();
+            }
             
         } //else not supported format
 
     }
+
     /**
      * Getter for the contentType. If not set or {@link MediaType#OCTET_STREAM}
      * than the media type is detected.<p>
@@ -204,14 +262,52 @@ public class TikaEngine 
         config = TikaConfig.getDefaultConfig();
         this.detector = config.getDetector();
         this.parser = new AutoDetectParser(config);
+        this.skipLinebreaks = getBoolean(ctx.getProperties(), 
+            SKIP_LINEBREAKS_WITHIN_CONTENT, DEFAULT_SKIP_LINEBREAKS);
+        this.ontologyMappings = new OntologyMappings();
+        if(getBoolean(ctx.getProperties(), 
+            MAPPING_MEDIA_RESOURCE, DEFAULT_MAPPING_MEDIA_RESOURCE_STATE)){
+            addMediaResourceOntologyMappings(ontologyMappings);
+        }
+        if(getBoolean(ctx.getProperties(), 
+            MAPPING_DUBLIN_CORE_TERMS, 
DEFAULT_MAPPING_DUBLIN_CORE_TERMS_STATE)){
+            addDcMappings(ontologyMappings);
+        }
+        if(getBoolean(ctx.getProperties(), 
+            MAPPING_NEPOMUK_MESSAGE, DEFAULT_MAPPING_NEPOMUK_MESSAGE_STATE)){
+            addNepomukMessageMappings(ontologyMappings);
+        }
+        if(getBoolean(ctx.getProperties(), 
+            MAPPING_NEPOMUK_EXIF, DEFAULT_MAPPING_NEPOMUK_EXIF_STATE)){
+            addNepomukExifMappings(ontologyMappings);
+        }
+        if(getBoolean(ctx.getProperties(), 
+            MAPPING_SKOS, DEFAULT_MAPPING_SKOS_STATE)){
+            addSkosMappings(ontologyMappings);
+        }
+        if(getBoolean(ctx.getProperties(), 
+            MAPPING_RDFS, DEFAULT_MAPPING_RDFS_STATE)){
+            addRdfsMappings(ontologyMappings);
+        }
+        if(getBoolean(ctx.getProperties(), 
+            MAPPING_GEO, DEFAULT_MAPPING_GEO_STATE)){
+            addGeoMappings(ontologyMappings);
+        }
     }
     @Override
     protected void deactivate(ComponentContext ctx) throws RuntimeException {
         this.config = null;
         this.parser = null;
         this.detector = null;
+        this.skipLinebreaks = DEFAULT_SKIP_LINEBREAKS;
+        this.ontologyMappings = null;
         super.deactivate(ctx);
     }
+    private static boolean getBoolean(Dictionary<?,?> properties, String key, 
boolean defaultState){
+        Object value = properties.get(key);
+        return value instanceof Boolean ? (Boolean)value :
+            value != null ? Boolean.parseBoolean(value.toString()) : 
defaultState;
+    }
 
     public Map<String, Object> getServiceProperties() {
         return Collections.unmodifiableMap(

Modified: 
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/handler/PlainTextHandler.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/handler/PlainTextHandler.java?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/handler/PlainTextHandler.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/handler/PlainTextHandler.java
 Sat Mar  3 21:17:23 2012
@@ -33,10 +33,12 @@ import org.xml.sax.SAXException;
  */
 public class PlainTextHandler extends ToTextContentHandler {
 
+    private static char[] SPACE = new char[]{' '};
     
     
     private final boolean skipWhitespaces;
     private final boolean skipLinebreakes;
+    boolean addedText = false;
     public PlainTextHandler(Writer writer, boolean skipIgnoreableWhitespaces, 
boolean skipLinebreaksWithinLiterals) {
         super(writer);
         this.skipWhitespaces = skipIgnoreableWhitespaces;
@@ -45,39 +47,38 @@ public class PlainTextHandler extends To
 
     @Override
     public void ignorableWhitespace(char[] ch, int start, int length) throws 
SAXException {
-        if(!skipWhitespaces){
-            super.ignorableWhitespace(ch, start, length);
+        if(!skipWhitespaces && addedText){
+            super.characters(ch, start, length);
+            addedText = false;
         } //else ignore
     }
     @Override
     public void characters(char[] ch, int start, int length) throws 
SAXException {
-        int in;
         if(skipLinebreakes){
-            //use an in(serte) and an it(erator) index to avoid copying
-            //the data to a new char[].
-            in = start;
-            for(int it = start; it<length;it++){
-                if(ch[it] != '\n'){
-                    ch[in] = ch[it];
-                    in++;
+            int end = start+length;
+            for(int pos = start; pos<end;pos++){
+                if(ch[pos] == '\n'){
+                    if(pos > start){
+                        super.characters(ch, start, pos-start);
+                        super.characters(SPACE, 0, 1);
+                    }
+                    start = pos+1;
+                    length = length-start;
                 } //ignore line breaks
             }
-            if(in == start){ //only line breaks 
-                return; // -> nothing to add
-            }
-        } else {
-            in = length;
         }
-        super.characters(ch, start, in);
+        if(length > 0) {
+            super.characters(ch, start, length);
+        }
+        addedText = true;
     }
     
     @Override
-    public void startElement(String uri, String localName, String qName, 
Attributes attributes) throws SAXException {
-        super.startElement(uri, localName, qName, attributes);
-    }
-    @Override
     public void endElement(String uri, String localName, String qName) throws 
SAXException {
-        // TODO Auto-generated method stub
+//        if(skipLinebreakes & addedText){
+//            characters(LINEBREAK, 0, 1);
+//            addedText = false;
+//        }
         super.endElement(uri, localName, qName);
     }
 }

Modified: 
incubator/stanbol/trunk/enhancer/engines/tika/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/tika/src/main/resources/OSGI-INF/metatype/metatype.properties
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/tika/src/main/resources/OSGI-INF/metatype/metatype.properties
 Sat Mar  3 21:17:23 2012
@@ -31,4 +31,40 @@ Enhancer Engine: Apache Tika
 org.apache.stanbol.enhancer.engines.tika.TikaEngine.description=Apache Tika \
 supports the conversion of parsed contentto XHTML and plain text. In addition \
 it allows also to extract additional metadata from various content formats. 
For \
-detailed information please see the Apache Tika project homepage.
\ No newline at end of file
+detailed information please see the Apache Tika project homepage.
+
+stanbol.engines.tika.skipLinebreaks.name=Skip Linebreaks
+stanbol.engines.tika.skipLinebreaks.description=Allows to replace line breaks \
+within the content with spaces.
+
+# Metadata -> Ontology mappings
+
+stanbol.engine.tika.mapping.mediaResource.name=Media Resource
+stanbol.engine.tika.mapping.mediaResource.description=Converts Metadata 
extracted \
+by Apache Tika to the W3C Ontology for Media Resources 1.0 \
+(http://www.w3.org/TR/mediaont-10/) 
+
+stanbol.engine.tika.mapping.dcTerms.name=Dublin Core
+stanbol.engine.tika.mapping.dcTerms.description==Converts Metadata extracted \
+by Apache Tika to Dublin Core Terms 
+
+stanbol.engine.tika.mapping.nepomukMessage.name=Message
+stanbol.engine.tika.mapping.nepomukMessage.description=Converts Metadata 
extracted \
+by Apache Tika to the Nepomuk Message Ontology
+
+stanbol.engine.tika.mapping.nepomukExif.name=EXIF
+stanbol.engine.tika.mapping.nepomukExif.description=Converts Metadata 
extracted \
+by Apache Tika to the Nepomuk EXIF Ontology
+
+stanbol.engine.tika.mapping.skos.name=SKOS
+stanbol.engine.tika.mapping.skos.description=Encodes labels and notes 
extracted \
+by Apache Tika as SKOS labels and notes
+
+stanbol.engine.tika.mapping.rdfs.name=RDFS
+stanbol.engine.tika.mapping.rdfs.description=Encodes labels and notes 
extracted \
+by Apache Tika as rdfs:label and rdfs:comment
+
+stanbol.engine.tika.mapping.geo.name=GEO
+stanbol.engine.tika.mapping.geo.description=Encodes latitude, longitude and \
+altitude information extracted by Apache Tika by using the W3C wgs84 Ontology
+

Modified: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
 Sat Mar  3 21:17:23 2012
@@ -21,11 +21,13 @@ import static org.apache.commons.io.IOUt
 import static org.apache.commons.io.IOUtils.toByteArray;
 import static org.apache.stanbol.enhancer.engines.tika.TikaEngine.XHTML;
 import static 
org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.CANNOT_ENHANCE;
+import static 
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
 import static org.apache.tika.mime.MediaType.OCTET_STREAM;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import java.io.IOException;
@@ -33,17 +35,33 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
 import java.util.Map.Entry;
+import java.util.Set;
 import java.util.regex.Pattern;
 
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TypedLiteral;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.ontologies.DC;
+import org.apache.clerezza.rdf.ontologies.RDF;
+import org.apache.clerezza.rdf.ontologies.XSD;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.io.LineIterator;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryContentItem;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Assert;
@@ -241,6 +259,171 @@ public class TikaEngineTest {
             "</body></html>");
     }
     @Test
+    public void testEMail() throws EngineException, IOException {
+        ContentItem ci = createContentItem("test.email.txt", "message/rfc822");
+        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+        engine.computeEnhancements(ci);
+        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, 
+            singleton("text/plain"));
+        assertNotNull(contentPart);
+        Blob plainTextBlob = contentPart.getValue();
+        assertNotNull(plainTextBlob);
+        assertContentRegexp(plainTextBlob, 
+            "Julien Nioche commented on TIKA-461:",
+            "I'll have a look at mime4j and try to use it in Tika",
+            "> RFC822 messages not parsed",
+            "Key: TIKA-461",
+            "URL: https://issues.apache.org/jira/browse/TIKA-461";);
+        //validate XHTML results
+        contentPart = ContentItemHelper.getBlob(ci, 
+            singleton("application/xhtml+xml"));
+        assertNotNull(contentPart);
+        Blob xhtmlBlob = contentPart.getValue();
+        assertNotNull(xhtmlBlob);
+        assertContentRegexp(xhtmlBlob, 
+            "<html xmlns=\"http://www.w3.org/1999/xhtml\";>",
+            "<title></title>",
+            "<body><p>",
+            "Julien Nioche commented on TIKA-461:",
+            "I'll have a look at mime4j and try to use it in Tika",
+            "&gt; RFC822 messages not parsed",
+            "Key: TIKA-461",
+            "URL: https://issues.apache.org/jira/browse/TIKA-461";);
+        //no check the extracted metadata!
+        //DC
+        verifyValue(ci, DC.date, XSD.dateTime,"2010-09-06T09:25:34Z");
+        verifyValue(ci, DC.format, null,"message/rfc822");
+        verifyValue(ci, DC.subject, null,"[jira] Commented: (TIKA-461) RFC822 
messages not parsed");
+        verifyValue(ci, DC.creator, null,"Julien Nioche (JIRA) 
<[email protected]>");
+        verifyValue(ci, new UriRef(NamespaceEnum.dc+"created"), 
XSD.dateTime,"2010-09-06T09:25:34Z");
+        
+        //Media Ontology
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"creationDate"),XSD.dateTime,"2010-09-06T09:25:34Z");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"hasFormat"),null,"message/rfc822");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"hasCreator"),null,"Julien Nioche (JIRA) 
<[email protected]>");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"hasContributor"),null,"Julien Nioche (JIRA) 
<[email protected]>");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"hasKeyword"),null,"[jira] Commented: (TIKA-461) 
RFC822 messages not parsed");
+
+        
+        //Nepomuk Message
+        String message = 
"http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#";;
+        verifyValue(ci, new UriRef(message+"from"),null,"Julien Nioche (JIRA) 
<[email protected]>");
+        verifyValue(ci, new UriRef(message+"to"),null,"[email protected]");
+        
+    }
+    @Test
+    public void testMp3() throws EngineException, IOException {
+        ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
+        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+        engine.computeEnhancements(ci);
+        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, 
+            singleton("text/plain"));
+        assertNotNull(contentPart);
+        Blob plainTextBlob = contentPart.getValue();
+        assertNotNull(plainTextBlob);
+        assertContentRegexp(plainTextBlob, 
+            "Test Title",
+            "Test Artist",
+            "Test Album");
+        //validate XHTML results
+        contentPart = ContentItemHelper.getBlob(ci, 
+            singleton("application/xhtml+xml"));
+        assertNotNull(contentPart);
+        Blob xhtmlBlob = contentPart.getValue();
+        assertNotNull(xhtmlBlob);
+        //Test AudioTrack metadata
+        NonLiteral audioTrack = verifyNonLiteral(ci, new 
UriRef(NamespaceEnum.media+"hasTrack"));
+        //types
+        verifyValues(ci, audioTrack, RDF.type, 
+            new UriRef(NamespaceEnum.media+"MediaFragment"),
+            new UriRef(NamespaceEnum.media+"Track"),
+            new UriRef(NamespaceEnum.media+"AudioTrack"));
+        //properties
+        verifyValue(ci, audioTrack, new 
UriRef(NamespaceEnum.media+"hasFormat"), XSD.string, "Stereo");
+        verifyValue(ci, audioTrack, new 
UriRef(NamespaceEnum.media+"samplingRate"), XSD.int_, "44100");
+        verifyValue(ci, audioTrack, new 
UriRef(NamespaceEnum.media+"hasCompression"), XSD.string, "MP3");
+    }
+    @Test
+    public void testGEOMetadata() throws EngineException, IOException{
+        //first validate Media Resource Ontology
+        UriRef hasLocation = new UriRef(NamespaceEnum.media+"hasLocation");
+        UriRef locationLatitude = new 
UriRef(NamespaceEnum.media+"locationLatitude");
+        UriRef locationLongitude = new 
UriRef(NamespaceEnum.media+"locationLongitude");
+        //UriRef locationAltitude = new 
UriRef(NamespaceEnum.media+"locationAltitude");
+        ContentItem ci = createContentItem("testJPEG_GEO.jpg", 
OCTET_STREAM.toString());//"video/x-ms-asf");
+        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+        engine.computeEnhancements(ci);
+        Iterator<Triple> it = ci.getMetadata().filter(ci.getUri(),hasLocation, 
null);
+        assertTrue(it.hasNext());
+        Resource r = it.next().getObject();
+        assertFalse(it.hasNext());
+        assertTrue(r instanceof NonLiteral);
+        NonLiteral location = verifyNonLiteral(ci, hasLocation);
+        //lat
+        verifyValue(ci, location, locationLatitude, XSD.double_, "12.54321");
+        //long
+        verifyValue(ci, location, locationLongitude, XSD.double_, "-54.1234");
+        
+        //second the GEO ont
+        UriRef lat = new UriRef(NamespaceEnum.geo+"lat");
+        UriRef lon = new UriRef(NamespaceEnum.geo+"long");
+        //lat
+        verifyValue(ci, lat, XSD.double_, "12.54321");
+        //long
+        verifyValue(ci, lon, XSD.double_, "-54.1234");
+    }
+    
+
+    
+    public void testMetadata() throws EngineException {
+        ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
+        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+        engine.computeEnhancements(ci);
+        verifyValue(ci,DC.creator,null,"Test Artist");
+        verifyValue(ci, DC.title,null,"Test Album");
+        verifyValue(ci, DC.format,null,"audio/mpeg");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"hasFormat"),null,"audio/mpeg");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"mainOriginalTitle"),null,"Test Album");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"hasContributor"),null,"Test Artist");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"releaseDate"),XSD.string,"2008");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"hasGenre"),null,"Rock");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"hasCreator"),null,"Test Artist");
+    }
+    @Test
+    public void testExifMetadata() throws EngineException {
+        String exif = 
"http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#";;
+        ContentItem ci = createContentItem("testJPEG_EXIF.jpg", "image/jpeg");
+        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+        engine.computeEnhancements(ci);
+        verifyValue(ci, new UriRef(exif+"make"),null,"Canon");
+        verifyValue(ci, new UriRef(exif+"software"),null,"Adobe Photoshop CS3 
Macintosh");
+        verifyValue(ci, new 
UriRef(exif+"dateTimeOriginal"),XSD.dateTime,"2009-08-11T07:09:45Z");
+        verifyValue(ci, new UriRef(exif+"relatedImageWidth"),XSD.int_,"100");
+        verifyValue(ci, new UriRef(exif+"fNumber"),XSD.double_,"5.6");
+        verifyValue(ci, new UriRef(exif+"model"),null,"Canon EOS 40D");
+        verifyValue(ci, new UriRef(exif+"isoSpeedRatings"),XSD.int_,"400");
+        verifyValue(ci, new UriRef(exif+"xResolution"),XSD.double_,"240.0");
+        verifyValue(ci, new UriRef(exif+"flash"),XSD.boolean_,"false");
+        verifyValue(ci, new UriRef(exif+"exposureTime"),XSD.double_,"6.25E-4");
+        verifyValue(ci, new UriRef(exif+"yResolution"),XSD.double_,"240.0");
+        verifyValue(ci, new UriRef(exif+"resolutionUnit"),XSD.string,"Inch");
+        verifyValue(ci, new UriRef(exif+"focalLength"),XSD.double_,"194.0");
+        verifyValue(ci, new UriRef(exif+"relatedImageLength"),XSD.int_,"68");
+        verifyValue(ci, new UriRef(exif+"bitsPerSample"),XSD.int_,"8");
+        //also Media Ontology mappings for Exif
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"frameHeight"),XSD.int_,"68");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"frameWidth"),XSD.int_,"100");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"hasFormat"),null,"image/jpeg");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.media+"creationDate"),XSD.dateTime,"2009-08-11T07:09:45Z");
+        verifyValues(ci, new 
UriRef(NamespaceEnum.media+"hasKeyword"),null,"serbor","moscow-birds","canon-55-250");
+        //and finally the mapped DC properties
+        verifyValue(ci, DC.format,null,"image/jpeg");
+        verifyValue(ci, DC.date,XSD.dateTime,"2009-08-11T07:09:45Z");
+        verifyValue(ci, new 
UriRef(NamespaceEnum.dc+"modified"),XSD.dateTime,"2009-10-02T21:02:49Z");
+        verifyValues(ci, DC.subject, null, 
"serbor","moscow-birds","canon-55-250");
+    }
+    
+    @Test
     public void testContentTypeDetection() throws EngineException, IOException 
{
         ContentItem ci = createContentItem("test.pdf", 
OCTET_STREAM.toString());
         assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
@@ -321,7 +504,7 @@ public class TikaEngineTest {
             throw new IllegalStateException("Unable to read test data!",e);
         }
         closeQuietly(in);
-        UriRef ref = new 
UriRef("urn:contentItem:content-"+ContentItemHelper.toHexString(data));
+        UriRef ref = new UriRef("urn:contentItem:content-"+randomUUID());
         return new InMemoryContentItem(data,contentType);
     }
     /**
@@ -364,4 +547,82 @@ public class TikaEngineTest {
         engine = null;
     }
 
+    /*
+     * Internal helper methods 
+     */
+    private NonLiteral verifyNonLiteral(ContentItem ci, UriRef property){
+        return verifyNonLiteral(ci, ci.getUri(), property);
+    }
+    private static NonLiteral verifyNonLiteral(ContentItem ci, UriRef subject, 
UriRef property){
+        Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
+        assertTrue(it.hasNext());
+        Resource r = it.next().getObject();
+        assertFalse(it.hasNext());
+        assertTrue(r instanceof NonLiteral);
+        return (NonLiteral)r;
+    }
+    private static UriRef verifyValue(ContentItem ci, UriRef property, UriRef 
value){
+        return verifyValue(ci, ci.getUri(), property, value);
+    }
+    private static UriRef verifyValue(ContentItem ci, NonLiteral subject, 
UriRef property, UriRef value){
+        Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
+        assertTrue(it.hasNext());
+        Resource r = it.next().getObject();
+        assertFalse(it.hasNext());
+        assertTrue(r instanceof UriRef);
+        assertEquals(value,r);
+        return (UriRef)r;
+   }
+    private static Literal verifyValue(ContentItem ci, UriRef property, UriRef 
dataType, String lexValue){
+        return verifyValue(ci, ci.getUri(), property, dataType, lexValue);
+    }
+    private static Literal verifyValue(ContentItem ci, NonLiteral subject, 
UriRef property, UriRef dataType, String lexValue){
+        Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
+        assertTrue(it.hasNext());
+        Resource r = it.next().getObject();
+        assertFalse(it.hasNext());
+        if(dataType == null){
+            assertTrue(r instanceof PlainLiteral);
+        } else {
+            assertTrue(r instanceof TypedLiteral);
+            assertEquals(dataType, ((TypedLiteral)r).getDataType());
+        }
+        assertEquals(lexValue,((Literal)r).getLexicalForm());
+        return (Literal)r;
+    }
+    private static Set<Literal> verifyValues(ContentItem ci, UriRef property, 
UriRef dataType, String...lexValues){
+        return verifyValues(ci, ci.getUri(), property, dataType, lexValues);
+    }
+    private static Set<Literal> verifyValues(ContentItem ci, NonLiteral 
subject, UriRef property, UriRef dataType, String...lexValues){
+        Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
+        assertTrue(it.hasNext());
+        Set<String> expected = new HashSet<String>(Arrays.asList(lexValues));
+        Set<Literal> found = new HashSet<Literal>(expected.size());
+        while(it.hasNext()){
+            Resource r = it.next().getObject();
+            if(dataType == null){
+                assertTrue(r instanceof PlainLiteral);
+            } else {
+                assertTrue(r instanceof TypedLiteral);
+                assertEquals(dataType, ((TypedLiteral)r).getDataType());
+            }
+            assertTrue(expected.remove(((Literal)r).getLexicalForm()));
+            found.add((Literal)r);
+        }
+        return found;
+    }
+    private static Set<NonLiteral> verifyValues(ContentItem ci, NonLiteral 
subject, UriRef property, NonLiteral...references){
+        Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
+        assertTrue(it.hasNext());
+        Set<NonLiteral> expected = new 
HashSet<NonLiteral>(Arrays.asList(references));
+        Set<NonLiteral> found = new HashSet<NonLiteral>(expected.size());
+        while(it.hasNext()){
+            Resource r = it.next().getObject();
+            assertTrue(r instanceof NonLiteral);
+            assertTrue(expected.remove(r));
+            found.add((NonLiteral)r);
+        }
+        return found;
+    }
+
 }

Modified: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/README
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/README?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/README 
(original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/README Sat 
Mar  3 21:17:23 2012
@@ -13,18 +13,29 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY 
 See the License for the specific language governing permissions and
 limitations under the License.
 
-All files within this directory are provided under the 
+All listed files are provided under the 
 
     Apache License, Version 2.0
     
 This includes the following files:
 
     test.doc
+    test.email.txt
     test.html
-    test.xhtml
     test.odt
     test.pages
     test.pdf
     test.rtf
+    test.xhtml
     test2.html
-    test2.pdf
\ No newline at end of file
+    test2.pdf
+    testASF.asf
+    testGIF.gif
+    testJPEG_EXIF.jpg
+    testJPEG_GEO.jpg
+    testMID.mid
+    testMP3id3v24.mp3
+    testMP3lyrics.mp3
+    testTIFF.tif
+
+

Added: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt?rev=1296704&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt 
(added)
+++ 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt 
Sat Mar  3 21:17:23 2012
@@ -0,0 +1,41 @@
+From: "Julien Nioche (JIRA)" <[email protected]>
+To: [email protected]
+Subject: [jira] Commented: (TIKA-461) RFC822 messages not parsed
+Reply-To: [email protected]
+Delivered-To: mailing list [email protected]
+Date: Mon, 6 Sep 2010 05:25:34 -0400 (EDT)
+In-Reply-To: <6089099.260231278600349994.JavaMail.jira@thor>
+MIME-Version: 1.0
+Content-Type: text/plain; charset=utf-8
+Content-Transfer-Encoding: 7bit
+X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394
+X-Virus-Checked: Checked by ClamAV on apache.org
+
+
+    [ 
https://issues.apache.org/jira/browse/TIKA-461?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12906468#action_12906468
 ] 
+
+Julien Nioche commented on TIKA-461:
+------------------------------------
+
+I'll have a look at mime4j and try to use it in Tika
+
+> RFC822 messages not parsed
+> --------------------------
+>
+>                 Key: TIKA-461
+>                 URL: https://issues.apache.org/jira/browse/TIKA-461
+>             Project: Tika
+>          Issue Type: Bug
+>          Components: parser
+>    Affects Versions: 0.7
+>            Reporter: Joshua Turner
+>            Assignee: Julien Nioche
+>
+> Presented with an RFC822 message exported from Thunderbird, AutodetectParser 
produces an empty body, and a Metadata containing only one key-value pair: 
"Content-Type=message/rfc822". Directly calling MboxParser likewise gives an 
empty body, but with two metadata pairs: "Content-Encoding=us-ascii 
Content-Type=application/mbox".
+> A quick peek at the source of MboxParser shows that the implementation is 
pretty naive. If the wiring can be sorted out, something like Apache James' 
mime4j might be a better bet.
+
+-- 
+This message is automatically generated by JIRA.
+-
+You can reply to this email to add a comment to the issue online.
+

Propchange: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testASF.asf
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testASF.asf?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testASF.asf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testGIF.gif
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testGIF.gif?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testGIF.gif
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_EXIF.jpg
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_EXIF.jpg?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_EXIF.jpg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_GEO.jpg
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_GEO.jpg?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_GEO.jpg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMID.mid
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMID.mid?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMID.mid
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3id3v24.mp3
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3id3v24.mp3?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3id3v24.mp3
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3lyrics.mp3
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3lyrics.mp3?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3lyrics.mp3
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testTIFF.tif
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testTIFF.tif?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testTIFF.tif
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: 
incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/NamespaceEnum.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/NamespaceEnum.java?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/NamespaceEnum.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/NamespaceEnum.java
 Sat Mar  3 21:17:23 2012
@@ -42,7 +42,12 @@ public enum NamespaceEnum {
      * The FISE namespace (1st version of the Enhancement Structure).
      * Currently the same as {@link NamespaceEnum#enhancer}
      */
-    fise("http://fise.iks-project.eu/ontology/";);
+    fise("http://fise.iks-project.eu/ontology/";),
+    /**
+     * The W3C Ontology for Media Resources http://www.w3.org/TR/mediaont-10/
+     */
+    media("http://www.w3.org/ns/ma-ont#";)
+    ;
     
     String ns;
     String prefix;

Modified: incubator/stanbol/trunk/enhancer/ldpath/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/pom.xml?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/ldpath/pom.xml Sat Mar  3 21:17:23 2012
@@ -80,7 +80,7 @@
             <exclude>src/test/resources/example.rdf.zip</exclude>
             <exclude>src/test/resources/example.txt</exclude>
             <exclude>src/test/resources/metadata.rdf.zip</exclude>
-              </excludes>
+          </excludes>
         </configuration>
       </plugin>
     </plugins>

Modified: 
incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
 (original)
+++ 
incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
 Sat Mar  3 21:17:23 2012
@@ -76,6 +76,10 @@ public enum NamespaceEnum {
     cc("http://creativecommons.org/ns#";),
     //Schema.org (see http://schema.org/docs/schemaorg.owl for the Ontology)
     schema("http://schema.org/",true),
+    /**
+     * The W3C Ontology for Media Resources http://www.w3.org/TR/mediaont-10/
+     */
+    media("http://www.w3.org/ns/ma-ont#";)
     ;
     /**
      * The logger

Modified: incubator/stanbol/trunk/parent/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/parent/pom.xml?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- incubator/stanbol/trunk/parent/pom.xml (original)
+++ incubator/stanbol/trunk/parent/pom.xml Sat Mar  3 21:17:23 2012
@@ -1080,11 +1080,6 @@
       </dependency>
       <dependency>
         <groupId>org.apache.clerezza</groupId>
-        <artifactId>rdf.ontologies</artifactId>
-        <version>0.11-incubating</version>
-      </dependency>      
-      <dependency>
-        <groupId>org.apache.clerezza</groupId>
         <artifactId>rdf.core.test</artifactId>
         <version>0.13-incubating</version>
       </dependency>


Reply via email to