Author: rwesten
Date: Tue Nov 12 15:33:04 2013
New Revision: 1541110
URL: http://svn.apache.org/r1541110
Log:
STANBOL-1210: Updated ContentItemWriter to use httpmime 4.3
MultipartMimeBuilder API; added debug/trace level loggings to the reader
Modified:
stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/writers/ContentItemWriter.java
stanbol/trunk/enhancer/jersey/src/test/java/org/apache/stanbol/enhancer/jersey/ContentItemReaderWriterTest.java
Modified:
stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java?rev=1541110&r1=1541109&r2=1541110&view=diff
==============================================================================
---
stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
(original)
+++
stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
Tue Nov 12 15:33:04 2013
@@ -22,6 +22,7 @@ import static org.apache.stanbol.enhance
import static
org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper.getEnhancementProperties;
import static
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
@@ -30,14 +31,15 @@ import java.lang.reflect.Type;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
-import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import javax.ws.rs.WebApplicationException;
@@ -119,8 +121,23 @@ public class ContentItemReader implement
//boolean withMetadata = withMetadata(httpHeaders);
ContentItem contentItem = null;
UriRef contentItemId = getContentItemId();
+ if(log.isTraceEnabled()){
+ //NOTE: enabling TRACE level logging will copy the parsed content
+ // into a BYTE array
+ log.trace("Parse ContentItem from");
+ log.trace(" - MediaType: {}",mediaType);
+ log.trace(" - Headers:");
+ for(Entry<String,List<String>> header : httpHeaders.entrySet()){
+ log.trace(" {}: {}", header.getKey(), header.getValue());
+ }
+ byte[] content = IOUtils.toByteArray(entityStream);
+ log.trace("content: \n{}", new String(content,"UTF-8"));
+ IOUtils.closeQuietly(entityStream);
+ entityStream = new ByteArrayInputStream(content);
+ }
Set<String> parsedContentIds = new HashSet<String>();
if(mediaType.isCompatible(MULTIPART)){
+ log.debug(" - parse Multipart MIME ContentItem");
//try to read ContentItem from "multipart/from-data"
MGraph metadata = null;
FileItemIterator fileItemIterator;
@@ -308,13 +325,14 @@ public class ContentItemReader implement
* @throws IOException on any error while accessing the contents of the
parsed
* {@link FileItemStream}
* @throws FileUploadException if the parsed contents are not correctly
- * encoded Multipoart MIME
+ * encoded Multipart MIME
*/
private ContentItem createContentItem(UriRef id, MGraph metadata,
FileItemStream content,Set<String> parsedContentParts) throws IOException,
FileUploadException {
MediaType partContentType =
MediaType.valueOf(content.getContentType());
ContentItem contentItem = null;
ContentItemFactory ciFactory = getContentItemFactory();
if(MULTIPART.isCompatible(partContentType)){
+ log.debug(" - multiple (alternate) ContentParts");
//multiple contentParts are parsed
FileItemIterator contentPartIterator = fu.getItemIterator(
new MessageBodyReaderContext(
@@ -322,12 +340,13 @@ public class ContentItemReader implement
while(contentPartIterator.hasNext()){
FileItemStream fis = contentPartIterator.next();
if(contentItem == null){
- log.debug("create ContentItem {} for content (type:{})",
- id,content.getContentType());
+ log.debug(" - create ContentItem {} for content
(type:{})",
+ id,fis.getContentType());
contentItem = ciFactory.createContentItem(id,
new
StreamSource(fis.openStream(),fis.getContentType()),
metadata);
} else {
+ log.debug(" - create Blob for content (type:{})",
fis.getContentType());
Blob blob = ciFactory.createBlob(new
StreamSource(fis.openStream(), fis.getContentType()));
UriRef contentPartId = null;
if(fis.getFieldName() != null &&
!fis.getFieldName().isEmpty()){
@@ -337,14 +356,14 @@ public class ContentItemReader implement
//TODO maybe we should throw an exception instead
contentPartId = new UriRef("urn:contentpart:"+
randomUUID());
}
- log.debug(" ... add Blob {} to ContentItem {} with
content (type:{})",
+ log.debug(" ... add Blob {} to ContentItem {} with
content (type:{})",
new Object[]{contentPartId, id, fis.getContentType()});
contentItem.addPart(contentPartId, blob);
parsedContentParts.add(contentPartId.getUnicodeString());
}
}
} else {
- log.debug("create ContentItem {} for content (type:{})",
+ log.debug(" - create ContentItem {} for content (type:{})",
id,content.getContentType());
contentItem = ciFactory.createContentItem(id,
new
StreamSource(content.openStream(),content.getContentType()),
Modified:
stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/writers/ContentItemWriter.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/writers/ContentItemWriter.java?rev=1541110&r1=1541109&r2=1541110&view=diff
==============================================================================
---
stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/writers/ContentItemWriter.java
(original)
+++
stanbol/trunk/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/writers/ContentItemWriter.java
Tue Nov 12 15:33:04 2013
@@ -45,9 +45,11 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Random;
import java.util.Set;
import javax.ws.rs.WebApplicationException;
@@ -61,20 +63,21 @@ import javax.ws.rs.ext.Provider;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.serializedform.Serializer;
-import org.apache.clerezza.rdf.core.serializedform.UnsupportedFormatException;
import
org.apache.clerezza.rdf.core.serializedform.UnsupportedSerializationFormatException;
import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
-import org.apache.http.entity.mime.FormBodyPart;
-import org.apache.http.entity.mime.HttpMultipart;
+import org.apache.http.HttpEntity;
+import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.MIME;
+import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.entity.mime.content.AbstractContentBody;
import org.apache.http.entity.mime.content.ContentBody;
import org.apache.http.entity.mime.content.ContentDescriptor;
-import org.apache.http.entity.mime.content.StringBody;
+import org.apache.http.entity.mime.content.InputStreamBody;
import org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
@@ -90,13 +93,32 @@ import org.slf4j.LoggerFactory;
@Provider
public class ContentItemWriter implements MessageBodyWriter<ContentItem> {
+ public static final String CONTENT_ITEM_BOUNDARY;
+ public static final String CONTENT_PARTS_BOUNDERY;;
+ /**
+ * The pool of ASCII chars to be used for generating a multipart boundary.
+ */
+ private final static char[] MULTIPART_CHARS =
+ "-_1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ .toCharArray();
+ static {
+ final Random rand = new Random();
+ final int count = rand.nextInt(11) + 10; // a random size from 10 to 20
+ StringBuilder randomString = new StringBuilder(count);
+ for (int i = 0; i < count; i++) {
+
randomString.append(MULTIPART_CHARS[rand.nextInt(MULTIPART_CHARS.length)]);
+ }
+ CONTENT_ITEM_BOUNDARY = "contentItem-"+randomString;
+ CONTENT_PARTS_BOUNDERY = "contentParts-"+randomString;
+ }
+ private static final ContentType MULTIPART_ALTERNATE =
ContentType.create("multipart/alternate");
+
Logger log = LoggerFactory.getLogger(ContentItemWriter.class);
/**
* The "multipart/*" wilrcard
*/
private static final MediaType MULTIPART =
MediaType.valueOf(MULTIPART_FORM_DATA_TYPE.getType()+"/*");
- private static final String CONTENT_ITEM_BOUNDARY = "contentItem";
private static final Charset UTF8 = Charset.forName("UTF-8");
private static final MediaType DEFAULT_RDF_FORMAT = new MediaType(
APPLICATION_JSON_TYPE.getType(),
@@ -214,25 +236,36 @@ public class ContentItemWriter implement
String contentType = String.format("%s/%s; charset=%s;
boundary=%s",
mediaType.getType(),mediaType.getSubtype(),charset.toString(),CONTENT_ITEM_BOUNDARY);
httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE,contentType);
- HttpMultipart entity = new HttpMultipart("from-data", charset
,CONTENT_ITEM_BOUNDARY);
+ MultipartEntityBuilder entityBuilder =
MultipartEntityBuilder.create();
+ entityBuilder.setBoundary(CONTENT_ITEM_BOUNDARY);
+ //HttpMultipart entity = new HttpMultipart("from-data", charset
,CONTENT_ITEM_BOUNDARY);
//(2) serialising the metadata
if(!isOmitMetadata(properties)){
- entity.addBodyPart(new FormBodyPart("metadata", new
ClerezzaContentBody(
- ci.getUri().getUnicodeString(), ci.getMetadata(),
- rdfFormat)));
+ entityBuilder.addPart("metadata", new ClerezzaContentBody(
+ ci.getUri().getUnicodeString(), ci.getMetadata(),
rdfFormat));
+// entity.addBodyPart(new FormBodyPart("metadata", new
ClerezzaContentBody(
+// ci.getUri().getUnicodeString(), ci.getMetadata(),
+// rdfFormat)));
}
//(3) serialising the Content (Bloby)
//(3.a) Filter based on parameter
List<Entry<UriRef,Blob>> includedBlobs = filterBlobs(ci,
properties);
//(3.b) Serialise the filtered
if(!includedBlobs.isEmpty()) {
- HttpMultipart content = new HttpMultipart("alternate", UTF8
,"contentParts");
+ Map<String,ContentBody> contentParts = new
LinkedHashMap<String,ContentBody>();
for(Entry<UriRef,Blob> entry : includedBlobs){
- content.addBodyPart(new
FormBodyPart(entry.getKey().getUnicodeString(),
- new BlobContentBody(entry.getValue()))); //no file name
+ Blob blob = entry.getValue();
+ ContentType ct = ContentType.create(blob.getMimeType());
+ String cs = blob.getParameter().get("charset");
+ if(StringUtils.isNotBlank(cs)){
+ ct = ct.withCharset(cs);
+ }
+ contentParts.put(entry.getKey().getUnicodeString(),
+ new InputStreamBody(blob.getStream(),ct));
}
//add all the blobs
- entity.addBodyPart(new FormBodyPart("content",new
MultipartContentBody(content, null)));
+ entityBuilder.addPart("content", new
MultipartContentBody(contentParts,
+ CONTENT_PARTS_BOUNDERY, MULTIPART_ALTERNATE));
} //else no content to include
Set<String> includeContentParts =
getIncludedContentPartURIs(properties);
if(includeContentParts != null){
@@ -248,21 +281,21 @@ public class ContentItemWriter implement
log.error(message,e);
throw new WebApplicationException(message,
Response.Status.INTERNAL_SERVER_ERROR);
}
- entity.addBodyPart(new FormBodyPart(
- ENHANCEMENT_PROPERTIES_URI.getUnicodeString(),
- new
StringBody(object.toString(),MediaType.APPLICATION_JSON,UTF8)));
+ entityBuilder.addTextBody(
+ ENHANCEMENT_PROPERTIES_URI.getUnicodeString(),
object.toString(),
+ ContentType.APPLICATION_JSON.withCharset(UTF8));
}
//(5) additional RDF metadata stored in contentParts
for(Entry<UriRef,TripleCollection> entry : getContentParts(ci,
TripleCollection.class).entrySet()){
if(includeContentParts.isEmpty() ||
includeContentParts.contains(
entry.getKey())){
- entity.addBodyPart(new
FormBodyPart(entry.getKey().getUnicodeString(),
+
entityBuilder.addPart(entry.getKey().getUnicodeString(),
new ClerezzaContentBody(null, //no file name
- entry.getValue(),rdfFormat)));
+ entry.getValue(),rdfFormat));
} // else ignore this content part
}
}
- entity.writeTo(entityStream);
+ entityBuilder.build().writeTo(entityStream);
}
}
@@ -398,20 +431,27 @@ public class ContentItemWriter implement
*/
private class MultipartContentBody extends AbstractContentBody implements
ContentBody,ContentDescriptor {
- private HttpMultipart multipart;
- private String name;
+ private Map<String,ContentBody> parts;
+ private String boundary;
- public MultipartContentBody(HttpMultipart multipart,String name){
- super(String.format("multipart/%s; boundary=%s",
- multipart.getSubType(), multipart.getBoundary()));
- this.name = name;
- this.multipart = multipart;
+ public MultipartContentBody(Map<String,ContentBody> parts, String
boundary, ContentType contentType){
+ super(contentType);
+ this.parts = parts;
+ this.boundary = boundary;
}
@Override
public String getCharset() {
- return multipart.getCharset().toString();
+ return null; //no charset for multipart parts
+ }
+ @Override
+ public String getMimeType() {
+ return new StringBuilder(super.getMimeType()).append("; boundary=")
+ .append(boundary).toString();
+ }
+ @Override
+ public ContentType getContentType() {
+ return super.getContentType();
}
-
@Override
public String getTransferEncoding() {
return MIME.ENC_8BIT;
@@ -419,17 +459,25 @@ public class ContentItemWriter implement
@Override
public long getContentLength() {
- return multipart.getTotalLength();
+ //not known as we would need to count the content length AND
+ //the length of the different mime headers.
+ return -1;
}
@Override
public String getFilename() {
- return name;
+ return null;
}
@Override
public void writeTo(OutputStream out) throws IOException {
- multipart.writeTo(out);
+ MultipartEntityBuilder builder = MultipartEntityBuilder.create();
+ builder.setBoundary(boundary);
+ for(Entry<String,ContentBody> part : parts.entrySet()){
+ builder.addPart(part.getKey(), part.getValue());
+ }
+ HttpEntity entity = builder.build();
+ entity.writeTo(out);
}
}
@@ -445,7 +493,8 @@ public class ContentItemWriter implement
private String name;
protected ClerezzaContentBody(String name, TripleCollection graph,
MediaType mimeType){
- super(mimeType.getType()+'/'+mimeType.getSubtype());
+ super(ContentType.create(new StringBuilder(mimeType.getType())
+ .append('/').append(mimeType.getSubtype()).toString(), UTF8));
charset = mimeType.getParameters().get("charset");
if(charset == null || charset.isEmpty()){
charset = UTF8.toString();
@@ -479,43 +528,5 @@ public class ContentItemWriter implement
getSerializer().serialize(out, graph,
getMediaType()+'/'+getSubType());
}
}
- private class BlobContentBody extends AbstractContentBody {
-
- private Blob blob;
-
- public BlobContentBody(Blob blob) {
- super(blob.getMimeType());
- this.blob = blob;
- }
-
- @Override
- public String getFilename() {
- return null;
- }
-
- @Override
- public void writeTo(OutputStream out) throws IOException {
- InputStream in = blob.getStream();
- IOUtils.copy(in, out);
- IOUtils.closeQuietly(in);
- }
-
- @Override
- public String getCharset() {
- return blob.getParameter().get("charset");
- }
-
- @Override
- public String getTransferEncoding() {
- return blob.getParameter().get("charset") == null ?
- MIME.ENC_BINARY : MIME.ENC_8BIT;
- }
-
- @Override
- public long getContentLength() {
- return -1;
- }
-
- }
}
Modified:
stanbol/trunk/enhancer/jersey/src/test/java/org/apache/stanbol/enhancer/jersey/ContentItemReaderWriterTest.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancer/jersey/src/test/java/org/apache/stanbol/enhancer/jersey/ContentItemReaderWriterTest.java?rev=1541110&r1=1541109&r2=1541110&view=diff
==============================================================================
---
stanbol/trunk/enhancer/jersey/src/test/java/org/apache/stanbol/enhancer/jersey/ContentItemReaderWriterTest.java
(original)
+++
stanbol/trunk/enhancer/jersey/src/test/java/org/apache/stanbol/enhancer/jersey/ContentItemReaderWriterTest.java
Tue Nov 12 15:33:04 2013
@@ -164,41 +164,41 @@ public class ContentItemReaderWriterTest
MediaType contentType = serializeContentItem(out);
assertTrue(MediaType.MULTIPART_FORM_DATA_TYPE.isCompatible(contentType));
assertNotNull(contentType.getParameters().get("boundary"));
-
assertEquals(contentType.getParameters().get("boundary"),"contentItem");
+
assertEquals(contentType.getParameters().get("boundary"),ContentItemWriter.CONTENT_ITEM_BOUNDARY);
assertNotNull(contentType.getParameters().get("charset"));
assertEquals(contentType.getParameters().get("charset"),"UTF-8");
//check the serialised multipart MIME
String multipartMime = new
String(out.toByteArray(),Charset.forName(contentType.getParameters().get("charset")));
log.info("Multipart MIME content:\n{}\n",multipartMime);
String[] tests = new String[]{
- "--"+contentType.getParameters().get("boundary"),
+ "--"+ContentItemWriter.CONTENT_ITEM_BOUNDARY,
"Content-Disposition: form-data; name=\"metadata\";
filename=\"urn:test\"",
"Content-Type: application/rdf+xml; charset=UTF-8",
"<rdf:type rdf:resource=\"urn:types:Document\"/>",
- "--"+contentType.getParameters().get("boundary"),
+ "--"+ContentItemWriter.CONTENT_ITEM_BOUNDARY,
"Content-Disposition: form-data; name=\"content\"",
- "Content-Type: multipart/alternate; boundary=contentParts;
charset=UTF-8",
- "--contentParts",
+ "Content-Type: multipart/alternate; boundary=contentParts",
+ "--"+ContentItemWriter.CONTENT_PARTS_BOUNDERY,
"Content-Disposition: form-data; name=\"urn:test_main\"",
"Content-Type: text/html; charset=UTF-8",
"This is a <b>ContentItem</b> to <i>Mime Multipart</i> test!",
- "--contentParts",
+ "--"+ContentItemWriter.CONTENT_PARTS_BOUNDERY,
"Content-Disposition: form-data; name=\"run:text:text\"",
"Content-Type: text/plain; charset=UTF-8",
"This is a ContentItem to Mime Multipart test!",
- "--contentParts--",
- "--"+contentType.getParameters().get("boundary"),
+ "--"+ContentItemWriter.CONTENT_PARTS_BOUNDERY+"--",
+ "--"+ContentItemWriter.CONTENT_ITEM_BOUNDARY,
"Content-Disposition: form-data;
name=\""+ENHANCEMENT_PROPERTIES_URI.getUnicodeString()+"\"",
"Content-Type: application/json; charset=UTF-8",
- "--"+contentType.getParameters().get("boundary"),
+ "--"+ContentItemWriter.CONTENT_ITEM_BOUNDARY,
"Content-Disposition: form-data;
name=\""+CHAIN_EXECUTION.getUnicodeString()+"\"",
"Content-Type: application/rdf+xml; charset=UTF-8",
"<rdf:type
rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/executionplan#ExecutionNode\"/>",
- "--"+contentType.getParameters().get("boundary")+"--"
+ "--"+ContentItemWriter.CONTENT_ITEM_BOUNDARY+"--"
};
for(String test : tests){
int index = multipartMime.indexOf(test);
- assertTrue(index >=0);
+ assertTrue("content does not contain '" + test + "'!",index >=0);
multipartMime = multipartMime.substring(index);
}
}
@@ -218,8 +218,8 @@ public class ContentItemReaderWriterTest
assertTrue(copy.isEmpty());
//assert Blob
assertEquals(contentItem.getBlob().getMimeType(),
ci.getBlob().getMimeType());
- String content = IOUtils.toString(contentItem.getStream(),"UTF-8");
- String readContent = IOUtils.toString(ci.getStream(), "UTF-8");
+ String content =
IOUtils.toString(contentItem.getBlob().getStream(),"UTF-8");
+ String readContent = IOUtils.toString(ci.getBlob().getStream(),
"UTF-8");
assertEquals(content, readContent);
Iterator<Entry<UriRef,Blob>> contentItemBlobsIt =
ContentItemHelper.getContentParts(contentItem,
Blob.class).entrySet().iterator();
Iterator<Entry<UriRef,Blob>> ciBlobsIt =
ContentItemHelper.getContentParts(ci, Blob.class).entrySet().iterator();