Author: rwesten
Date: Sat Mar 3 21:17:23 2012
New Revision: 1296704
URL: http://svn.apache.org/viewvc?rev=1296704&view=rev
Log:
STANBOL-512: Added support for Tika metadata -> Ontology mappings
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/metadata/
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt
(with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testASF.asf
(with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testGIF.gif
(with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_EXIF.jpg
(with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_GEO.jpg
(with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMID.mid
(with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3id3v24.mp3
(with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3lyrics.mp3
(with props)
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testTIFF.tif
(with props)
Modified:
incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/handler/PlainTextHandler.java
incubator/stanbol/trunk/enhancer/engines/tika/src/main/resources/OSGI-INF/metatype/metatype.properties
incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/README
incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/NamespaceEnum.java
incubator/stanbol/trunk/enhancer/ldpath/pom.xml
incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
incubator/stanbol/trunk/parent/pom.xml
Modified: incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/pom.xml?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/pom.xml Sat Mar 3 21:17:23
2012
@@ -75,14 +75,23 @@
<exclude>src/license/THIRD-PARTY.properties</exclude>
<!-- AL20 License for test resources (see
src/test/resources/README) -->
<exclude>src/test/resources/test.doc</exclude>
+ <exclude>src/test/resources/test.email.txt</exclude>
<exclude>src/test/resources/test.html</exclude>
- <exclude>src/test/resources/test.xhtml</exclude>
<exclude>src/test/resources/test.odt</exclude>
<exclude>src/test/resources/test.pages</exclude>
<exclude>src/test/resources/test.pdf</exclude>
<exclude>src/test/resources/test.rtf</exclude>
+ <exclude>src/test/resources/test.xhtml</exclude>
<exclude>src/test/resources/test2.html</exclude>
<exclude>src/test/resources/test2.pdf</exclude>
+ <exclude>src/test/resources/testASF.asf</exclude>
+ <exclude>src/test/resources/testGIF.gif</exclude>
+ <exclude>src/test/resources/testJPEG_EXIF.jpg</exclude>
+ <exclude>src/test/resources/testJPEG_GEO.jpg</exclude>
+ <exclude>src/test/resources/testMID.mid</exclude>
+ <exclude>src/test/resources/testMP3id3v24.mp3</exclude>
+ <exclude>src/test/resources/testMP3lyrics.mp3</exclude>
+ <exclude>src/test/resources/testTIFF.tif</exclude>
</excludes>
</configuration>
</plugin>
@@ -114,6 +123,10 @@
<groupId>org.apache.clerezza</groupId>
<artifactId>rdf.core</artifactId>
</dependency>
+ <dependency> <!-- for metadata mappings -->
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.ontologies</artifactId>
+ </dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
Modified:
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
Sat Mar 3 21:17:23 2012
@@ -16,6 +16,13 @@
*/
package org.apache.stanbol.enhancer.engines.tika;
+import static
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addDcMappings;
+import static
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addGeoMappings;
+import static
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addMediaResourceOntologyMappings;
+import static
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addNepomukExifMappings;
+import static
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addNepomukMessageMappings;
+import static
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addRdfsMappings;
+import static
org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings.addSkosMappings;
import static
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
import static org.apache.tika.mime.MediaType.TEXT_PLAIN;
@@ -23,9 +30,12 @@ import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
+import java.util.Arrays;
import java.util.Collections;
+import java.util.Dictionary;
import java.util.Map;
+import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.commons.io.IOUtils;
import org.apache.felix.scr.annotations.Component;
@@ -34,6 +44,7 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.enhancer.engines.tika.handler.MultiHandler;
import org.apache.stanbol.enhancer.engines.tika.handler.PlainTextHandler;
+import org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -67,12 +78,43 @@ import org.xml.sax.ContentHandler;
@Component(immediate = true, metatype = true, inherit=true)
@Service
@Properties(value={
- @Property(name=EnhancementEngine.PROPERTY_NAME,value="tika")
+ @Property(name=EnhancementEngine.PROPERTY_NAME,value="tika"),
+ @Property(name=TikaEngine.SKIP_LINEBREAKS_WITHIN_CONTENT,
boolValue=TikaEngine.DEFAULT_SKIP_LINEBREAKS),
+
@Property(name=TikaEngine.MAPPING_MEDIA_RESOURCE,boolValue=TikaEngine.DEFAULT_MAPPING_MEDIA_RESOURCE_STATE),
+
@Property(name=TikaEngine.MAPPING_DUBLIN_CORE_TERMS,boolValue=TikaEngine.DEFAULT_MAPPING_DUBLIN_CORE_TERMS_STATE),
+
@Property(name=TikaEngine.MAPPING_NEPOMUK_MESSAGE,boolValue=TikaEngine.DEFAULT_MAPPING_NEPOMUK_MESSAGE_STATE),
+
@Property(name=TikaEngine.MAPPING_NEPOMUK_EXIF,boolValue=TikaEngine.DEFAULT_MAPPING_NEPOMUK_EXIF_STATE),
+
@Property(name=TikaEngine.MAPPING_SKOS,boolValue=TikaEngine.DEFAULT_MAPPING_SKOS_STATE),
+
@Property(name=TikaEngine.MAPPING_RDFS,boolValue=TikaEngine.DEFAULT_MAPPING_RDFS_STATE),
+
@Property(name=TikaEngine.MAPPING_GEO,boolValue=TikaEngine.DEFAULT_MAPPING_GEO_STATE)
})
public class TikaEngine
extends AbstractEnhancementEngine<RuntimeException,RuntimeException>
implements EnhancementEngine, ServiceProperties {
private final Logger log = LoggerFactory.getLogger(TikaEngine.class);
+
+ private final LiteralFactory lf = LiteralFactory.getInstance();
+
+ public static final String SKIP_LINEBREAKS_WITHIN_CONTENT =
"stanbol.engines.tika.skipLinebreaks";
+ //Metadata -> Ontology mapping configuration
+ public static final String MAPPING_MEDIA_RESOURCE =
"stanbol.engine.tika.mapping.mediaResource";
+ public static final boolean DEFAULT_MAPPING_MEDIA_RESOURCE_STATE = true;
+ public static final String MAPPING_DUBLIN_CORE_TERMS =
"stanbol.engine.tika.mapping.dcTerms";
+ public static final boolean DEFAULT_MAPPING_DUBLIN_CORE_TERMS_STATE = true;
+ public static final String MAPPING_NEPOMUK_MESSAGE =
"stanbol.engine.tika.mapping.nepomukMessage";
+ public static final boolean DEFAULT_MAPPING_NEPOMUK_MESSAGE_STATE = true;
+ public static final String MAPPING_NEPOMUK_EXIF =
"stanbol.engine.tika.mapping.nepomukExif";
+ public static final boolean DEFAULT_MAPPING_NEPOMUK_EXIF_STATE = true;
+ public static final String MAPPING_SKOS =
"stanbol.engine.tika.mapping.skos";
+ public static final boolean DEFAULT_MAPPING_SKOS_STATE = false;
+ public static final String MAPPING_RDFS =
"stanbol.engine.tika.mapping.rdfs";
+ public static final boolean DEFAULT_MAPPING_RDFS_STATE = false;
+ public static final String MAPPING_GEO = "stanbol.engine.tika.mapping.geo";
+ public static final boolean DEFAULT_MAPPING_GEO_STATE = true;
+
+ public static final boolean DEFAULT_SKIP_LINEBREAKS = false;
+
+ private boolean skipLinebreaks = DEFAULT_SKIP_LINEBREAKS;
/**
* The default value for the Execution of this Engine. Currently set to
* {@link ServiceProperties#ORDERING_PRE_PROCESSING}
@@ -84,7 +126,8 @@ public class TikaEngine
private TikaConfig config;
private Parser parser;
private Detector detector;
-
+ private OntologyMappings ontologyMappings;
+
private static class MediaTypeAndStream {
MediaType mediaType;
InputStream in;
@@ -119,7 +162,7 @@ public class TikaEngine
metadata.set(Metadata.CONTENT_TYPE, mtas.mediaType.toString());
final StringWriter writer = new StringWriter();
final ContentHandler textHandler = new BodyContentHandler( //only
the Body
- new PlainTextHandler(writer, true,false)); //skip ignoreable
+ new PlainTextHandler(writer, false,skipLinebreaks)); //skip
ignoreable
final ToXMLContentHandler xhtmlHandler;
final ContentHandler mainHandler;
if(!plainMediaType.equals(XHTML)){ //do not parse XHTML from XHTML
@@ -137,25 +180,40 @@ public class TikaEngine
"plain text!",e);
}
IOUtils.closeQuietly(in);
-// log.info("Plain Content: \n{} \n",writer.toString());
+ if(log.isDebugEnabled()){
+ log.debug("Plain Content: \n{}",writer.toString());
+ }
String random = randomUUID().toString();
UriRef textBlobUri = new UriRef("urn:tika:text:"+random);
ci.addPart(textBlobUri,
new InMemoryBlob(writer.toString(),
TEXT_PLAIN.toString())); //string -> no encoding
if(xhtmlHandler != null){
-// log.info("XML Content: \n{} \n",xhtmlHandler.toString());
+ if(log.isDebugEnabled()){
+ log.debug("XML Content: \n{}",xhtmlHandler.toString());
+ }
UriRef xhtmlBlobUri = new UriRef("urn:tika:xhtml:"+random);
ci.addPart(xhtmlBlobUri,
new InMemoryBlob(xhtmlHandler.toString(),
"application/xhtml+xml")); //string -> no encoding
}
- //TODO:
- // * add also the Metadata extracted by Apache Tika
+ //add the extracted metadata
+ if(log.isDebugEnabled()){
+ for(String name : metadata.names()){
+ log.debug("{}:
{}",name,Arrays.toString(metadata.getValues(name)));
+ }
+ }
+ ci.getLock().writeLock().lock();
+ try {
+ ontologyMappings.apply(ci.getMetadata(), ci.getUri(),
metadata);
+ }finally{
+ ci.getLock().writeLock().unlock();
+ }
} //else not supported format
}
+
/**
* Getter for the contentType. If not set or {@link MediaType#OCTET_STREAM}
* than the media type is detected.<p>
@@ -204,14 +262,52 @@ public class TikaEngine
config = TikaConfig.getDefaultConfig();
this.detector = config.getDetector();
this.parser = new AutoDetectParser(config);
+ this.skipLinebreaks = getBoolean(ctx.getProperties(),
+ SKIP_LINEBREAKS_WITHIN_CONTENT, DEFAULT_SKIP_LINEBREAKS);
+ this.ontologyMappings = new OntologyMappings();
+ if(getBoolean(ctx.getProperties(),
+ MAPPING_MEDIA_RESOURCE, DEFAULT_MAPPING_MEDIA_RESOURCE_STATE)){
+ addMediaResourceOntologyMappings(ontologyMappings);
+ }
+ if(getBoolean(ctx.getProperties(),
+ MAPPING_DUBLIN_CORE_TERMS,
DEFAULT_MAPPING_DUBLIN_CORE_TERMS_STATE)){
+ addDcMappings(ontologyMappings);
+ }
+ if(getBoolean(ctx.getProperties(),
+ MAPPING_NEPOMUK_MESSAGE, DEFAULT_MAPPING_NEPOMUK_MESSAGE_STATE)){
+ addNepomukMessageMappings(ontologyMappings);
+ }
+ if(getBoolean(ctx.getProperties(),
+ MAPPING_NEPOMUK_EXIF, DEFAULT_MAPPING_NEPOMUK_EXIF_STATE)){
+ addNepomukExifMappings(ontologyMappings);
+ }
+ if(getBoolean(ctx.getProperties(),
+ MAPPING_SKOS, DEFAULT_MAPPING_SKOS_STATE)){
+ addSkosMappings(ontologyMappings);
+ }
+ if(getBoolean(ctx.getProperties(),
+ MAPPING_RDFS, DEFAULT_MAPPING_RDFS_STATE)){
+ addRdfsMappings(ontologyMappings);
+ }
+ if(getBoolean(ctx.getProperties(),
+ MAPPING_GEO, DEFAULT_MAPPING_GEO_STATE)){
+ addGeoMappings(ontologyMappings);
+ }
}
@Override
protected void deactivate(ComponentContext ctx) throws RuntimeException {
this.config = null;
this.parser = null;
this.detector = null;
+ this.skipLinebreaks = DEFAULT_SKIP_LINEBREAKS;
+ this.ontologyMappings = null;
super.deactivate(ctx);
}
+ private static boolean getBoolean(Dictionary<?,?> properties, String key,
boolean defaultState){
+ Object value = properties.get(key);
+ return value instanceof Boolean ? (Boolean)value :
+ value != null ? Boolean.parseBoolean(value.toString()) :
defaultState;
+ }
public Map<String, Object> getServiceProperties() {
return Collections.unmodifiableMap(
Modified:
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/handler/PlainTextHandler.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/handler/PlainTextHandler.java?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/handler/PlainTextHandler.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/handler/PlainTextHandler.java
Sat Mar 3 21:17:23 2012
@@ -33,10 +33,12 @@ import org.xml.sax.SAXException;
*/
public class PlainTextHandler extends ToTextContentHandler {
+ private static char[] SPACE = new char[]{' '};
private final boolean skipWhitespaces;
private final boolean skipLinebreakes;
+ boolean addedText = false;
public PlainTextHandler(Writer writer, boolean skipIgnoreableWhitespaces,
boolean skipLinebreaksWithinLiterals) {
super(writer);
this.skipWhitespaces = skipIgnoreableWhitespaces;
@@ -45,39 +47,38 @@ public class PlainTextHandler extends To
@Override
public void ignorableWhitespace(char[] ch, int start, int length) throws
SAXException {
- if(!skipWhitespaces){
- super.ignorableWhitespace(ch, start, length);
+ if(!skipWhitespaces && addedText){
+ super.characters(ch, start, length);
+ addedText = false;
} //else ignore
}
@Override
public void characters(char[] ch, int start, int length) throws
SAXException {
- int in;
if(skipLinebreakes){
- //use an in(serte) and an it(erator) index to avoid copying
- //the data to a new char[].
- in = start;
- for(int it = start; it<length;it++){
- if(ch[it] != '\n'){
- ch[in] = ch[it];
- in++;
+ int end = start+length;
+ for(int pos = start; pos<end;pos++){
+ if(ch[pos] == '\n'){
+ if(pos > start){
+ super.characters(ch, start, pos-start);
+ super.characters(SPACE, 0, 1);
+ }
+ start = pos+1;
+ length = length-start;
} //ignore line breaks
}
- if(in == start){ //only line breaks
- return; // -> nothing to add
- }
- } else {
- in = length;
}
- super.characters(ch, start, in);
+ if(length > 0) {
+ super.characters(ch, start, length);
+ }
+ addedText = true;
}
@Override
- public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
- super.startElement(uri, localName, qName, attributes);
- }
- @Override
public void endElement(String uri, String localName, String qName) throws
SAXException {
- // TODO Auto-generated method stub
+// if(skipLinebreakes & addedText){
+// characters(LINEBREAK, 0, 1);
+// addedText = false;
+// }
super.endElement(uri, localName, qName);
}
}
Modified:
incubator/stanbol/trunk/enhancer/engines/tika/src/main/resources/OSGI-INF/metatype/metatype.properties
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/tika/src/main/resources/OSGI-INF/metatype/metatype.properties
(original)
+++
incubator/stanbol/trunk/enhancer/engines/tika/src/main/resources/OSGI-INF/metatype/metatype.properties
Sat Mar 3 21:17:23 2012
@@ -31,4 +31,40 @@ Enhancer Engine: Apache Tika
org.apache.stanbol.enhancer.engines.tika.TikaEngine.description=Apache Tika \
supports the conversion of parsed contentto XHTML and plain text. In addition \
it allows also to extract additional metadata from various content formats.
For \
-detailed information please see the Apache Tika project homepage.
\ No newline at end of file
+detailed information please see the Apache Tika project homepage.
+
+stanbol.engines.tika.skipLinebreaks.name=Skip Linebreaks
+stanbol.engines.tika.skipLinebreaks.description=Allows to replace line breaks \
+within the content with spaces.
+
+# Metadata -> Ontology mappings
+
+stanbol.engine.tika.mapping.mediaResource.name=Media Resource
+stanbol.engine.tika.mapping.mediaResource.description=Converts Metadata
extracted \
+by Apache Tika to the W3C Ontology for Media Resources 1.0 \
+(http://www.w3.org/TR/mediaont-10/)
+
+stanbol.engine.tika.mapping.dcTerms.name=Dublin Core
+stanbol.engine.tika.mapping.dcTerms.description==Converts Metadata extracted \
+by Apache Tika to Dublin Core Terms
+
+stanbol.engine.tika.mapping.nepomukMessage.name=Message
+stanbol.engine.tika.mapping.nepomukMessage.description=Converts Metadata
extracted \
+by Apache Tika to the Nepomuk Message Ontology
+
+stanbol.engine.tika.mapping.nepomukExif.name=EXIF
+stanbol.engine.tika.mapping.nepomukExif.description=Converts Metadata
extracted \
+by Apache Tika to the Nepomuk EXIF Ontology
+
+stanbol.engine.tika.mapping.skos.name=SKOS
+stanbol.engine.tika.mapping.skos.description=Encodes labels and notes
extracted \
+by Apache Tika as SKOS labels and notes
+
+stanbol.engine.tika.mapping.rdfs.name=RDFS
+stanbol.engine.tika.mapping.rdfs.description=Encodes labels and notes
extracted \
+by Apache Tika as rdfs:label and rdfs:comment
+
+stanbol.engine.tika.mapping.geo.name=GEO
+stanbol.engine.tika.mapping.geo.description=Encodes latitude, longitude and \
+altitude information extracted by Apache Tika by using the W3C wgs84 Ontology
+
Modified:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
Sat Mar 3 21:17:23 2012
@@ -21,11 +21,13 @@ import static org.apache.commons.io.IOUt
import static org.apache.commons.io.IOUtils.toByteArray;
import static org.apache.stanbol.enhancer.engines.tika.TikaEngine.XHTML;
import static
org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.CANNOT_ENHANCE;
+import static
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
import static org.apache.tika.mime.MediaType.OCTET_STREAM;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
@@ -33,17 +35,33 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
import java.util.Map.Entry;
+import java.util.Set;
import java.util.regex.Pattern;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TypedLiteral;
import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.ontologies.DC;
+import org.apache.clerezza.rdf.ontologies.RDF;
+import org.apache.clerezza.rdf.ontologies.XSD;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryContentItem;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
@@ -241,6 +259,171 @@ public class TikaEngineTest {
"</body></html>");
}
@Test
+ public void testEMail() throws EngineException, IOException {
+ ContentItem ci = createContentItem("test.email.txt", "message/rfc822");
+ assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+ engine.computeEnhancements(ci);
+ Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci,
+ singleton("text/plain"));
+ assertNotNull(contentPart);
+ Blob plainTextBlob = contentPart.getValue();
+ assertNotNull(plainTextBlob);
+ assertContentRegexp(plainTextBlob,
+ "Julien Nioche commented on TIKA-461:",
+ "I'll have a look at mime4j and try to use it in Tika",
+ "> RFC822 messages not parsed",
+ "Key: TIKA-461",
+ "URL: https://issues.apache.org/jira/browse/TIKA-461");
+ //validate XHTML results
+ contentPart = ContentItemHelper.getBlob(ci,
+ singleton("application/xhtml+xml"));
+ assertNotNull(contentPart);
+ Blob xhtmlBlob = contentPart.getValue();
+ assertNotNull(xhtmlBlob);
+ assertContentRegexp(xhtmlBlob,
+ "<html xmlns=\"http://www.w3.org/1999/xhtml\">",
+ "<title></title>",
+ "<body><p>",
+ "Julien Nioche commented on TIKA-461:",
+ "I'll have a look at mime4j and try to use it in Tika",
+ "> RFC822 messages not parsed",
+ "Key: TIKA-461",
+ "URL: https://issues.apache.org/jira/browse/TIKA-461");
+ //no check the extracted metadata!
+ //DC
+ verifyValue(ci, DC.date, XSD.dateTime,"2010-09-06T09:25:34Z");
+ verifyValue(ci, DC.format, null,"message/rfc822");
+ verifyValue(ci, DC.subject, null,"[jira] Commented: (TIKA-461) RFC822
messages not parsed");
+ verifyValue(ci, DC.creator, null,"Julien Nioche (JIRA)
<[email protected]>");
+ verifyValue(ci, new UriRef(NamespaceEnum.dc+"created"),
XSD.dateTime,"2010-09-06T09:25:34Z");
+
+ //Media Ontology
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"creationDate"),XSD.dateTime,"2010-09-06T09:25:34Z");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"hasFormat"),null,"message/rfc822");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"hasCreator"),null,"Julien Nioche (JIRA)
<[email protected]>");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"hasContributor"),null,"Julien Nioche (JIRA)
<[email protected]>");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"hasKeyword"),null,"[jira] Commented: (TIKA-461)
RFC822 messages not parsed");
+
+
+ //Nepomuk Message
+ String message =
"http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#";
+ verifyValue(ci, new UriRef(message+"from"),null,"Julien Nioche (JIRA)
<[email protected]>");
+ verifyValue(ci, new UriRef(message+"to"),null,"[email protected]");
+
+ }
+ @Test
+ public void testMp3() throws EngineException, IOException {
+ ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
+ assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+ engine.computeEnhancements(ci);
+ Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci,
+ singleton("text/plain"));
+ assertNotNull(contentPart);
+ Blob plainTextBlob = contentPart.getValue();
+ assertNotNull(plainTextBlob);
+ assertContentRegexp(plainTextBlob,
+ "Test Title",
+ "Test Artist",
+ "Test Album");
+ //validate XHTML results
+ contentPart = ContentItemHelper.getBlob(ci,
+ singleton("application/xhtml+xml"));
+ assertNotNull(contentPart);
+ Blob xhtmlBlob = contentPart.getValue();
+ assertNotNull(xhtmlBlob);
+ //Test AudioTrack metadata
+ NonLiteral audioTrack = verifyNonLiteral(ci, new
UriRef(NamespaceEnum.media+"hasTrack"));
+ //types
+ verifyValues(ci, audioTrack, RDF.type,
+ new UriRef(NamespaceEnum.media+"MediaFragment"),
+ new UriRef(NamespaceEnum.media+"Track"),
+ new UriRef(NamespaceEnum.media+"AudioTrack"));
+ //properties
+ verifyValue(ci, audioTrack, new
UriRef(NamespaceEnum.media+"hasFormat"), XSD.string, "Stereo");
+ verifyValue(ci, audioTrack, new
UriRef(NamespaceEnum.media+"samplingRate"), XSD.int_, "44100");
+ verifyValue(ci, audioTrack, new
UriRef(NamespaceEnum.media+"hasCompression"), XSD.string, "MP3");
+ }
+ @Test
+ public void testGEOMetadata() throws EngineException, IOException{
+ //first validate Media Resource Ontology
+ UriRef hasLocation = new UriRef(NamespaceEnum.media+"hasLocation");
+ UriRef locationLatitude = new
UriRef(NamespaceEnum.media+"locationLatitude");
+ UriRef locationLongitude = new
UriRef(NamespaceEnum.media+"locationLongitude");
+ //UriRef locationAltitude = new
UriRef(NamespaceEnum.media+"locationAltitude");
+ ContentItem ci = createContentItem("testJPEG_GEO.jpg",
OCTET_STREAM.toString());//"video/x-ms-asf");
+ assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+ engine.computeEnhancements(ci);
+ Iterator<Triple> it = ci.getMetadata().filter(ci.getUri(),hasLocation,
null);
+ assertTrue(it.hasNext());
+ Resource r = it.next().getObject();
+ assertFalse(it.hasNext());
+ assertTrue(r instanceof NonLiteral);
+ NonLiteral location = verifyNonLiteral(ci, hasLocation);
+ //lat
+ verifyValue(ci, location, locationLatitude, XSD.double_, "12.54321");
+ //long
+ verifyValue(ci, location, locationLongitude, XSD.double_, "-54.1234");
+
+ //second the GEO ont
+ UriRef lat = new UriRef(NamespaceEnum.geo+"lat");
+ UriRef lon = new UriRef(NamespaceEnum.geo+"long");
+ //lat
+ verifyValue(ci, lat, XSD.double_, "12.54321");
+ //long
+ verifyValue(ci, lon, XSD.double_, "-54.1234");
+ }
+
+
+
+ public void testMetadata() throws EngineException {
+ ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
+ assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+ engine.computeEnhancements(ci);
+ verifyValue(ci,DC.creator,null,"Test Artist");
+ verifyValue(ci, DC.title,null,"Test Album");
+ verifyValue(ci, DC.format,null,"audio/mpeg");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"hasFormat"),null,"audio/mpeg");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"mainOriginalTitle"),null,"Test Album");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"hasContributor"),null,"Test Artist");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"releaseDate"),XSD.string,"2008");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"hasGenre"),null,"Rock");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"hasCreator"),null,"Test Artist");
+ }
+ @Test
+ public void testExifMetadata() throws EngineException {
+ String exif =
"http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#";
+ ContentItem ci = createContentItem("testJPEG_EXIF.jpg", "image/jpeg");
+ assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+ engine.computeEnhancements(ci);
+ verifyValue(ci, new UriRef(exif+"make"),null,"Canon");
+ verifyValue(ci, new UriRef(exif+"software"),null,"Adobe Photoshop CS3
Macintosh");
+ verifyValue(ci, new
UriRef(exif+"dateTimeOriginal"),XSD.dateTime,"2009-08-11T07:09:45Z");
+ verifyValue(ci, new UriRef(exif+"relatedImageWidth"),XSD.int_,"100");
+ verifyValue(ci, new UriRef(exif+"fNumber"),XSD.double_,"5.6");
+ verifyValue(ci, new UriRef(exif+"model"),null,"Canon EOS 40D");
+ verifyValue(ci, new UriRef(exif+"isoSpeedRatings"),XSD.int_,"400");
+ verifyValue(ci, new UriRef(exif+"xResolution"),XSD.double_,"240.0");
+ verifyValue(ci, new UriRef(exif+"flash"),XSD.boolean_,"false");
+ verifyValue(ci, new UriRef(exif+"exposureTime"),XSD.double_,"6.25E-4");
+ verifyValue(ci, new UriRef(exif+"yResolution"),XSD.double_,"240.0");
+ verifyValue(ci, new UriRef(exif+"resolutionUnit"),XSD.string,"Inch");
+ verifyValue(ci, new UriRef(exif+"focalLength"),XSD.double_,"194.0");
+ verifyValue(ci, new UriRef(exif+"relatedImageLength"),XSD.int_,"68");
+ verifyValue(ci, new UriRef(exif+"bitsPerSample"),XSD.int_,"8");
+ //also Media Ontology mappings for Exif
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"frameHeight"),XSD.int_,"68");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"frameWidth"),XSD.int_,"100");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"hasFormat"),null,"image/jpeg");
+ verifyValue(ci, new
UriRef(NamespaceEnum.media+"creationDate"),XSD.dateTime,"2009-08-11T07:09:45Z");
+ verifyValues(ci, new
UriRef(NamespaceEnum.media+"hasKeyword"),null,"serbor","moscow-birds","canon-55-250");
+ //and finally the mapped DC properties
+ verifyValue(ci, DC.format,null,"image/jpeg");
+ verifyValue(ci, DC.date,XSD.dateTime,"2009-08-11T07:09:45Z");
+ verifyValue(ci, new
UriRef(NamespaceEnum.dc+"modified"),XSD.dateTime,"2009-10-02T21:02:49Z");
+ verifyValues(ci, DC.subject, null,
"serbor","moscow-birds","canon-55-250");
+ }
+
+ @Test
public void testContentTypeDetection() throws EngineException, IOException
{
ContentItem ci = createContentItem("test.pdf",
OCTET_STREAM.toString());
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
@@ -321,7 +504,7 @@ public class TikaEngineTest {
throw new IllegalStateException("Unable to read test data!",e);
}
closeQuietly(in);
- UriRef ref = new
UriRef("urn:contentItem:content-"+ContentItemHelper.toHexString(data));
+ UriRef ref = new UriRef("urn:contentItem:content-"+randomUUID());
return new InMemoryContentItem(data,contentType);
}
/**
@@ -364,4 +547,82 @@ public class TikaEngineTest {
engine = null;
}
+ /*
+ * Internal helper methods
+ */
+ private NonLiteral verifyNonLiteral(ContentItem ci, UriRef property){
+ return verifyNonLiteral(ci, ci.getUri(), property);
+ }
+ private static NonLiteral verifyNonLiteral(ContentItem ci, UriRef subject,
UriRef property){
+ Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
+ assertTrue(it.hasNext());
+ Resource r = it.next().getObject();
+ assertFalse(it.hasNext());
+ assertTrue(r instanceof NonLiteral);
+ return (NonLiteral)r;
+ }
+ private static UriRef verifyValue(ContentItem ci, UriRef property, UriRef
value){
+ return verifyValue(ci, ci.getUri(), property, value);
+ }
+ private static UriRef verifyValue(ContentItem ci, NonLiteral subject,
UriRef property, UriRef value){
+ Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
+ assertTrue(it.hasNext());
+ Resource r = it.next().getObject();
+ assertFalse(it.hasNext());
+ assertTrue(r instanceof UriRef);
+ assertEquals(value,r);
+ return (UriRef)r;
+ }
+ private static Literal verifyValue(ContentItem ci, UriRef property, UriRef
dataType, String lexValue){
+ return verifyValue(ci, ci.getUri(), property, dataType, lexValue);
+ }
+ private static Literal verifyValue(ContentItem ci, NonLiteral subject,
UriRef property, UriRef dataType, String lexValue){
+ Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
+ assertTrue(it.hasNext());
+ Resource r = it.next().getObject();
+ assertFalse(it.hasNext());
+ if(dataType == null){
+ assertTrue(r instanceof PlainLiteral);
+ } else {
+ assertTrue(r instanceof TypedLiteral);
+ assertEquals(dataType, ((TypedLiteral)r).getDataType());
+ }
+ assertEquals(lexValue,((Literal)r).getLexicalForm());
+ return (Literal)r;
+ }
+ private static Set<Literal> verifyValues(ContentItem ci, UriRef property,
UriRef dataType, String...lexValues){
+ return verifyValues(ci, ci.getUri(), property, dataType, lexValues);
+ }
+ private static Set<Literal> verifyValues(ContentItem ci, NonLiteral
subject, UriRef property, UriRef dataType, String...lexValues){
+ Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
+ assertTrue(it.hasNext());
+ Set<String> expected = new HashSet<String>(Arrays.asList(lexValues));
+ Set<Literal> found = new HashSet<Literal>(expected.size());
+ while(it.hasNext()){
+ Resource r = it.next().getObject();
+ if(dataType == null){
+ assertTrue(r instanceof PlainLiteral);
+ } else {
+ assertTrue(r instanceof TypedLiteral);
+ assertEquals(dataType, ((TypedLiteral)r).getDataType());
+ }
+ assertTrue(expected.remove(((Literal)r).getLexicalForm()));
+ found.add((Literal)r);
+ }
+ return found;
+ }
+ private static Set<NonLiteral> verifyValues(ContentItem ci, NonLiteral
subject, UriRef property, NonLiteral...references){
+ Iterator<Triple> it = ci.getMetadata().filter(subject,property, null);
+ assertTrue(it.hasNext());
+ Set<NonLiteral> expected = new
HashSet<NonLiteral>(Arrays.asList(references));
+ Set<NonLiteral> found = new HashSet<NonLiteral>(expected.size());
+ while(it.hasNext()){
+ Resource r = it.next().getObject();
+ assertTrue(r instanceof NonLiteral);
+ assertTrue(expected.remove(r));
+ found.add((NonLiteral)r);
+ }
+ return found;
+ }
+
}
Modified:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/README
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/README?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/README
(original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/README Sat
Mar 3 21:17:23 2012
@@ -13,18 +13,29 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY
See the License for the specific language governing permissions and
limitations under the License.
-All files within this directory are provided under the
+All listed files are provided under the
Apache License, Version 2.0
This includes the following files:
test.doc
+ test.email.txt
test.html
- test.xhtml
test.odt
test.pages
test.pdf
test.rtf
+ test.xhtml
test2.html
- test2.pdf
\ No newline at end of file
+ test2.pdf
+ testASF.asf
+ testGIF.gif
+ testJPEG_EXIF.jpg
+ testJPEG_GEO.jpg
+ testMID.mid
+ testMP3id3v24.mp3
+ testMP3lyrics.mp3
+ testTIFF.tif
+
+
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt?rev=1296704&view=auto
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt
(added)
+++
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt
Sat Mar 3 21:17:23 2012
@@ -0,0 +1,41 @@
+From: "Julien Nioche (JIRA)" <[email protected]>
+To: [email protected]
+Subject: [jira] Commented: (TIKA-461) RFC822 messages not parsed
+Reply-To: [email protected]
+Delivered-To: mailing list [email protected]
+Date: Mon, 6 Sep 2010 05:25:34 -0400 (EDT)
+In-Reply-To: <6089099.260231278600349994.JavaMail.jira@thor>
+MIME-Version: 1.0
+Content-Type: text/plain; charset=utf-8
+Content-Transfer-Encoding: 7bit
+X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394
+X-Virus-Checked: Checked by ClamAV on apache.org
+
+
+ [
https://issues.apache.org/jira/browse/TIKA-461?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12906468#action_12906468
]
+
+Julien Nioche commented on TIKA-461:
+------------------------------------
+
+I'll have a look at mime4j and try to use it in Tika
+
+> RFC822 messages not parsed
+> --------------------------
+>
+> Key: TIKA-461
+> URL: https://issues.apache.org/jira/browse/TIKA-461
+> Project: Tika
+> Issue Type: Bug
+> Components: parser
+> Affects Versions: 0.7
+> Reporter: Joshua Turner
+> Assignee: Julien Nioche
+>
+> Presented with an RFC822 message exported from Thunderbird, AutodetectParser
produces an empty body, and a Metadata containing only one key-value pair:
"Content-Type=message/rfc822". Directly calling MboxParser likewise gives an
empty body, but with two metadata pairs: "Content-Encoding=us-ascii
Content-Type=application/mbox".
+> A quick peek at the source of MboxParser shows that the implementation is
pretty naive. If the wiring can be sorted out, something like Apache James'
mime4j might be a better bet.
+
+--
+This message is automatically generated by JIRA.
+-
+You can reply to this email to add a comment to the issue online.
+
Propchange:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/test.email.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testASF.asf
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testASF.asf?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testASF.asf
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testGIF.gif
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testGIF.gif?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testGIF.gif
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_EXIF.jpg
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_EXIF.jpg?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_EXIF.jpg
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_GEO.jpg
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_GEO.jpg?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testJPEG_GEO.jpg
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMID.mid
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMID.mid?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMID.mid
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3id3v24.mp3
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3id3v24.mp3?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3id3v24.mp3
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3lyrics.mp3
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3lyrics.mp3?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP3lyrics.mp3
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testTIFF.tif
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testTIFF.tif?rev=1296704&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testTIFF.tif
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified:
incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/NamespaceEnum.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/NamespaceEnum.java?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/NamespaceEnum.java
(original)
+++
incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/NamespaceEnum.java
Sat Mar 3 21:17:23 2012
@@ -42,7 +42,12 @@ public enum NamespaceEnum {
* The FISE namespace (1st version of the Enhancement Structure).
* Currently the same as {@link NamespaceEnum#enhancer}
*/
- fise("http://fise.iks-project.eu/ontology/");
+ fise("http://fise.iks-project.eu/ontology/"),
+ /**
+ * The W3C Ontology for Media Resources http://www.w3.org/TR/mediaont-10/
+ */
+ media("http://www.w3.org/ns/ma-ont#")
+ ;
String ns;
String prefix;
Modified: incubator/stanbol/trunk/enhancer/ldpath/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/pom.xml?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/ldpath/pom.xml Sat Mar 3 21:17:23 2012
@@ -80,7 +80,7 @@
<exclude>src/test/resources/example.rdf.zip</exclude>
<exclude>src/test/resources/example.txt</exclude>
<exclude>src/test/resources/metadata.rdf.zip</exclude>
- </excludes>
+ </excludes>
</configuration>
</plugin>
</plugins>
Modified:
incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
---
incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
(original)
+++
incubator/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
Sat Mar 3 21:17:23 2012
@@ -76,6 +76,10 @@ public enum NamespaceEnum {
cc("http://creativecommons.org/ns#"),
//Schema.org (see http://schema.org/docs/schemaorg.owl for the Ontology)
schema("http://schema.org/",true),
+ /**
+ * The W3C Ontology for Media Resources http://www.w3.org/TR/mediaont-10/
+ */
+ media("http://www.w3.org/ns/ma-ont#")
;
/**
* The logger
Modified: incubator/stanbol/trunk/parent/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/parent/pom.xml?rev=1296704&r1=1296703&r2=1296704&view=diff
==============================================================================
--- incubator/stanbol/trunk/parent/pom.xml (original)
+++ incubator/stanbol/trunk/parent/pom.xml Sat Mar 3 21:17:23 2012
@@ -1080,11 +1080,6 @@
</dependency>
<dependency>
<groupId>org.apache.clerezza</groupId>
- <artifactId>rdf.ontologies</artifactId>
- <version>0.11-incubating</version>
- </dependency>
- <dependency>
- <groupId>org.apache.clerezza</groupId>
<artifactId>rdf.core.test</artifactId>
<version>0.13-incubating</version>
</dependency>