Repository: cxf Updated Branches: refs/heads/master 20ec9b68d -> d94cb4384
Adding ContentHandler back to TikaContent Project: http://git-wip-us.apache.org/repos/asf/cxf/repo Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/d94cb438 Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/d94cb438 Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/d94cb438 Branch: refs/heads/master Commit: d94cb4384654108428de8ff8c4a7cbd17c772936 Parents: 20ec9b6 Author: Sergey Beryozkin <[email protected]> Authored: Thu Nov 10 14:15:57 2016 +0000 Committer: Sergey Beryozkin <[email protected]> Committed: Thu Nov 10 14:15:57 2016 +0000 ---------------------------------------------------------------------- .../jaxrs/ext/search/tika/TikaContentExtractor.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cxf/blob/d94cb438/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java ---------------------------------------------------------------------- diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java index e4d1918..d69da2d 100644 --- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java +++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaContentExtractor.java @@ -162,7 +162,7 @@ public class TikaContentExtractor { * @return the extracted content and metadata or null if extraction is not possible * or was unsuccessful */ - public TikaContent extract(final InputStream in, final ContentHandler handler, + public TikaContent extract(final InputStream in, ContentHandler handler, javax.ws.rs.core.MediaType mtHint, ParseContext context) { if (in == null) { return null; @@ -215,12 +215,13 @@ public class TikaContentExtractor { // extraction process. If we get an exception with a null handler then a given parser is still // not ready to accept null handlers so lets retry with IgnoreContentHandler. if (handler == null) { - parser.parse(in, new IgnoreContentHandler(), metadata, context); + handler = new IgnoreContentHandler(); + parser.parse(in, handler, metadata, context); } else { throw ex; } } - return new TikaContent(handler == null ? null : handler.toString(), metadata, mediaType); + return new TikaContent(handler, metadata, mediaType); } catch (final IOException ex) { LOG.log(Level.WARNING, "Unable to extract media type from input stream", ex); } catch (final SAXException ex) { @@ -269,10 +270,10 @@ public class TikaContentExtractor { */ public static class TikaContent implements Serializable { private static final long serialVersionUID = -1240120543378490963L; - private String content; + private ContentHandler content; private Metadata metadata; private MediaType mediaType; - public TikaContent(String content, Metadata metadata, MediaType mediaType) { + public TikaContent(ContentHandler content, Metadata metadata, MediaType mediaType) { this.content = content; this.metadata = metadata; this.mediaType = mediaType; @@ -283,7 +284,7 @@ public class TikaContentExtractor { * to parse the content */ public String getContent() { - return content; + return content instanceof ToTextContentHandler ? content.toString() : null; } /** * Return the metadata
