Author: tallison
Date: Fri Dec 19 15:46:57 2014
New Revision: 1646779
URL: http://svn.apache.org/r1646779
Log:
TIKA-1499: fold MetadataEP in tika-server into MetadataResource
Added:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TextMessageBodyWriter.java
Removed:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataEP.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataEPTest.java
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
Modified: tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1646779&r1=1646778&r2=1646779&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Fri Dec 19 15:46:57 2014
@@ -1,4 +1,8 @@
Release 1.7 - Current Development
+
+ * Tika Server support for selecting a single metadata key;
+ wrapped MetadataEP into MetadataResource (TIKA-1499).
+
* Tika Server support for JSON and XMP views of metadata (TIKA-1497).
* Tika Parent uses dependency management to keep duplicate
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java?rev=1646779&r1=1646778&r2=1646779&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
Fri Dec 19 15:46:57 2014
@@ -22,6 +22,7 @@ import java.io.InputStream;
import javax.ws.rs.Consumes;
import javax.ws.rs.PUT;
import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.HttpHeaders;
@@ -66,6 +67,60 @@ public class MetadataResource {
parseMetadata(is, httpHeaders.getRequestHeaders(), info)).build();
}
+ /**
+ * Get a specific metadata field. If the input stream cannot be parsed, but a
+ * value was found for the given metadata field, then the value of the field
+ * is returned as part of a 200 OK response; otherwise a
+ * {@link javax.ws.rs.core.Response.Status#BAD_REQUEST} is generated. If the
stream was successfully
+ * parsed but the specific metadata field was not found, then a
+ * {@link javax.ws.rs.core.Response.Status#NOT_FOUND} is returned.
+ * <p>
+ * Note that this method handles multivalue fields and returns possibly more
+ * metadata value than requested.
+ * <p>
+ * If you want XMP, you must be careful to specify the exact XMP key.
+ * For example, "Author" will return nothing, but "dc:creator" will return
the correct value.
+ *
+ * @param is inputstream
+ * @param httpHeaders httpheaders
+ * @param info info
+ * @param field the tika metadata field name
+ * @return one of {@link javax.ws.rs.core.Response.Status#OK}, {@link
javax.ws.rs.core.Response.Status#NOT_FOUND}, or
+ * {@link javax.ws.rs.core.Response.Status#BAD_REQUEST}
+ * @throws Exception
+ */
+ @PUT
+ @Path("{field}")
+ @Produces({"text/csv", "application/json", "application/rdf+xml",
"text/plain"})
+ public Response getMetadataField(InputStream is, @Context HttpHeaders
httpHeaders,
+ @Context UriInfo info, @PathParam("field")
String field) throws Exception {
+
+ // use BAD request to indicate that we may not have had enough data to
+ // process the request
+ Response.Status defaultErrorResponse = Response.Status.BAD_REQUEST;
+ Metadata metadata = null;
+ try {
+ metadata = parseMetadata(is, httpHeaders.getRequestHeaders(), info);
+ // once we've parsed the document successfully, we should use NOT_FOUND
+ // if we did not see the field
+ defaultErrorResponse = Response.Status.NOT_FOUND;
+ } catch (Exception e) {
+ logger.info("Failed to process field " + field, e);
+ }
+
+ if (metadata == null || metadata.get(field) == null) {
+ return Response.status(defaultErrorResponse).entity("Failed to get
metadata field " + field).build();
+ }
+
+ // remove fields we don't care about for the response
+ for (String name : metadata.names()) {
+ if (!field.equals(name)) {
+ metadata.remove(name);
+ }
+ }
+ return Response.ok(metadata).build();
+ }
+
private Metadata parseMetadata(InputStream is,
MultivaluedMap<String, String> httpHeaders,
UriInfo info) throws Exception {
final Metadata metadata = new Metadata();
Added:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TextMessageBodyWriter.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TextMessageBodyWriter.java?rev=1646779&view=auto
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TextMessageBodyWriter.java
(added)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TextMessageBodyWriter.java
Fri Dec 19 15:46:57 2014
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import au.com.bytecode.opencsv.CSVWriter;
+import org.apache.tika.metadata.Metadata;
+
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.MultivaluedMap;
+import javax.ws.rs.ext.MessageBodyWriter;
+import javax.ws.rs.ext.Provider;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Type;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+/**
+ * Returns simple text string for a particular metadata value.
+ * This assumes that the metadata object only has one key;
+ * if there is more than one key or no keys, this will throw a webapp
exception.
+ * <p>
+ * This will choose the first value returned for the one key.
+ */
+@Provider
+@Produces(MediaType.TEXT_PLAIN)
+public class TextMessageBodyWriter implements MessageBodyWriter<Metadata> {
+
+ public boolean isWriteable(Class<?> type, Type genericType, Annotation[]
annotations, MediaType mediaType) {
+ return mediaType.equals(MediaType.TEXT_PLAIN_TYPE) &&
Metadata.class.isAssignableFrom(type);
+ }
+
+ public long getSize(Metadata data, Class<?> type, Type genericType,
Annotation[] annotations, MediaType mediaType) {
+ return -1;
+ }
+
+ @Override
+ @SuppressWarnings("resource")
+ public void writeTo(Metadata metadata, Class<?> type, Type genericType,
Annotation[] annotations,
+ MediaType mediaType, MultivaluedMap<String, Object> httpHeaders,
OutputStream entityStream) throws IOException,
+ WebApplicationException {
+
+ if (metadata.names().length != 1) {
+ throw new WebApplicationException("Metadata object must only have one
entry!");
+ }
+ Writer writer = new OutputStreamWriter(entityStream, "UTF-8");
+
+ for (String name : metadata.names()) {
+ writer.write(metadata.get(name));
+ }
+
+ // Don't close, just flush the stream
+ writer.flush();
+ }
+}
+
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1646779&r1=1646778&r2=1646779&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
Fri Dec 19 15:46:57 2014
@@ -119,6 +119,7 @@ public class TikaServerCli {
providers.add(new MetadataListMessageBodyWriter());
providers.add(new JSONMessageBodyWriter());
providers.add(new XMPMessageBodyWriter());
+ providers.add(new TextMessageBodyWriter());
providers.add(new TikaExceptionMapper());
if (logFilter != null) {
providers.add(logFilter);
Modified:
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java?rev=1646779&r1=1646778&r2=1646779&view=diff
==============================================================================
---
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
(original)
+++
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
Fri Dec 19 15:46:57 2014
@@ -20,14 +20,19 @@ package org.apache.tika.server;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
+import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import org.apache.cxf.helpers.IOUtils;
@@ -36,6 +41,7 @@ import org.apache.cxf.jaxrs.client.WebCl
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.serialization.JsonMetadata;
+import org.junit.Assert;
import org.junit.Test;
import au.com.bytecode.opencsv.CSVReader;
@@ -56,6 +62,7 @@ public class MetadataResourceTest extend
providers.add(new JSONMessageBodyWriter());
providers.add(new CSVMessageBodyWriter());
providers.add(new XMPMessageBodyWriter());
+ providers.add(new TextMessageBodyWriter());
sf.setProviders(providers);
}
@@ -162,5 +169,74 @@ public class MetadataResourceTest extend
assertContains("<rdf:li>Maxim Valyanskiy</rdf:li>", result);
}
+ //Now test requesting one field
+ @Test
+ public void testGetField_XXX_NotFound() throws Exception {
+ Response response = WebClient.create(endPoint + META_PATH +
"/xxx").type("application/msword")
+
.accept(MediaType.APPLICATION_JSON).put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
+ Assert.assertEquals(Response.Status.NOT_FOUND.getStatusCode(),
response.getStatus());
+ }
+
+ @Test
+ public void testGetField_Author_TEXT_Partial_BAD_REQUEST() throws
Exception {
+
+ InputStream stream =
ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+
+ Response response = WebClient.create(endPoint + META_PATH +
"/Author").type("application/msword")
+ .accept(MediaType.TEXT_PLAIN).put(copy(stream, 8000));
+ Assert.assertEquals(Response.Status.BAD_REQUEST.getStatusCode(),
response.getStatus());
+ }
+
+ @Test
+ public void testGetField_Author_TEXT_Partial_Found() throws Exception {
+
+ InputStream stream =
ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+
+ Response response = WebClient.create(endPoint + META_PATH +
"/Author").type("application/msword")
+ .accept(MediaType.TEXT_PLAIN).put(copy(stream, 12000));
+ Assert.assertEquals(Response.Status.OK.getStatusCode(),
response.getStatus());
+ String s =
IOUtils.readStringFromStream((InputStream)response.getEntity());
+ assertEquals("Maxim Valyanskiy", s);
+ }
+
+ @Test
+ public void testGetField_Author_JSON_Partial_Found() throws Exception {
+
+ InputStream stream =
ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+
+ Response response = WebClient.create(endPoint + META_PATH +
"/Author").type("application/msword")
+ .accept(MediaType.APPLICATION_JSON).put(copy(stream, 12000));
+ Assert.assertEquals(Response.Status.OK.getStatusCode(),
response.getStatus());
+ Metadata metadata = JsonMetadata.fromJson(new
InputStreamReader((InputStream)response.getEntity()));
+ assertEquals("Maxim Valyanskiy", metadata.get("Author"));
+ assertEquals(1, metadata.names().length);
+ }
+
+ @Test
+ public void testGetField_Author_XMP_Partial_Found() throws Exception {
+
+ InputStream stream =
ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
+
+ Response response = WebClient.create(endPoint + META_PATH +
"/dc:creator").type("application/msword")
+ .accept("application/rdf+xml").put(copy(stream, 12000));
+ Assert.assertEquals(Response.Status.OK.getStatusCode(),
response.getStatus());
+ String s =
IOUtils.readStringFromStream((InputStream)response.getEntity());
+ assertContains("<rdf:li>Maxim Valyanskiy</rdf:li>", s);
+ }
+
+ private static InputStream copy(InputStream in, int remaining) throws
IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ while (remaining > 0) {
+ byte[] bytes = new byte[remaining];
+ int n = in.read(bytes);
+ if (n <= 0) {
+ break;
+ }
+ out.write(bytes, 0, n);
+ remaining -= n;
+ }
+ return new ByteArrayInputStream(out.toByteArray());
+ }
+
}