Repository: cxf Updated Branches: refs/heads/master 785c0bd70 -> 0253be4a7
[CXF-5549] Adding a constructor accepting list of parsers to Lucene extractor too Project: http://git-wip-us.apache.org/repos/asf/cxf/repo Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/0253be4a Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/0253be4a Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/0253be4a Branch: refs/heads/master Commit: 0253be4a776598fae2d5952fa2f7fa36aac6deeb Parents: 785c0bd Author: Sergey Beryozkin <[email protected]> Authored: Fri Jun 27 16:58:13 2014 +0100 Committer: Sergey Beryozkin <[email protected]> Committed: Fri Jun 27 16:58:13 2014 +0100 ---------------------------------------------------------------------- .../ext/search/tika/TikaLuceneContentExtractor.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cxf/blob/0253be4a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java ---------------------------------------------------------------------- diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java index 28eaa35..567463b 100644 --- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java +++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java @@ -20,6 +20,7 @@ package org.apache.cxf.jaxrs.ext.search.tika; import java.io.InputStream; import java.util.Date; +import java.util.List; import org.apache.cxf.jaxrs.ext.search.tika.TikaContentExtractor.TikaContent; import org.apache.lucene.document.Document; @@ -88,6 +89,21 @@ public class TikaLuceneContentExtractor { } /** + * Create new Tika-based content extractor using the provided parser instance and + * optional media type validation. If validation is enabled, the implementation + * will try to detect the media type of the input and validate it against media types + * supported by the parser. + * @param parser parser instancethis.contentFieldName + * @param validateMediaType enabled or disable media type validation + * @param documentMetadata documentMetadata + */ + public TikaLuceneContentExtractor(final List<Parser> parsers, + final LuceneDocumentMetadata documentMetadata) { + this.extractor = new TikaContentExtractor(parsers); + this.defaultDocumentMetadata = documentMetadata; + } + + /** * Extract the content and metadata from the input stream. Depending on media type validation, * the detector could be run against input stream in order to ensure that parser supports this * type of content.
