[ https://issues.apache.org/jira/browse/TIKA-4207?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17831840#comment-17831840 ]
Hudson commented on TIKA-4207: ------------------------------ SUCCESS: Integrated in Jenkins build Tika ยป tika-main-jdk11 #1580 (See [https://ci-builds.apache.org/job/Tika/job/tika-main-jdk11/1580/]) TIKA-4207: Add handling of embedded bytes to tika-pipes (#1699) (github: [https://github.com/apache/tika/commit/4fe7312330c430f357012f8d0ff886a0fb344783]) * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/WMFParser.java * (add) tika-pipes/tika-async-cli/src/test/resources/configs/TIKA-4207-emitter.xml * (edit) tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java * (add) tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentByteStoreExtractorFactory.java * (edit) tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java * (edit) tika-core/src/test/java/org/apache/tika/pipes/PipesServerTest.java * (add) tika-app/src/test/java/org/apache/tika/cli/TikaCLIAsyncTest.java * (edit) tika-pipes/tika-pipes-iterators/pom.xml * (edit) tika-pipes/tika-async-cli/pom.xml * (add) tika-pipes/tika-pipes-iterators/tika-pipes-iterator-json/pom.xml * (edit) tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java * (add) tika-core/src/test/resources/org/apache/tika/pipes/TIKA-4207.xml * (add) tika-core/src/main/java/org/apache/tika/pipes/extractor/EmbeddedDocumentBytesConfig.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-no-names.xml * (delete) tika-core/src/test/java/org/apache/tika/pipes/async/AsyncProcessorTest.java * (add) tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentBytesHandler.java * (add) tika-core/src/test/java/org/apache/tika/pipes/async/AsyncChaosMonkeyTest.java * (add) tika-pipes/tika-pipes-iterators/tika-pipes-iterator-json/src/test/resources/test-documents/test.json * (edit) tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/AsyncResource.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-with-names.xml * (add) tika-core/src/main/java/org/apache/tika/pipes/extractor/EmittingEmbeddedDocumentBytesHandler.java * (delete) tika-pipes/tika-async-cli/src/test/resources/tika-config-broken.xml * (add) tika-pipes/tika-async-cli/src/test/resources/configs/tika-config-broken.xml * (add) tika-pipes/tika-pipes-iterators/tika-pipes-iterator-json/src/test/java/org/apache/tika/pipes/pipesiterator/json/TestJsonPipesIterator.java * (add) tika-core/src/main/java/org/apache/tika/extractor/BasicEmbeddedDocumentBytesHandler.java * (add) tika-pipes/tika-pipes-iterators/tika-pipes-iterator-json/src/main/java/org/apache/tika/pipes/pipesiterator/json/JsonPipesIterator.java * (edit) tika-serialization/src/main/java/org/apache/tika/metadata/serialization/JsonFetchEmitTuple.java * (edit) tika-core/src/main/java/org/apache/tika/pipes/PipesServer.java * (edit) tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/TikaAsyncCLITest.java * (edit) tika-serialization/src/test/java/org/apache/tika/metadata/serialization/JsonFetchEmitTupleTest.java * (add) tika-pipes/tika-pipes-iterators/tika-pipes-iterator-json/src/test/resources/test-documents/test-with-embedded-bytes.json * (add) tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java * (add) tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java * (edit) tika-core/src/main/java/org/apache/tika/pipes/FetchEmitTuple.java * (edit) tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java * (edit) tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/TikaPipesTest.java * (add) tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractorFactory.java * (add) tika-core/src/test/resources/org/apache/tika/pipes/TIKA-4207-limit-bytes.xml * (add) tika-core/src/main/java/org/apache/tika/extractor/BasicEmbeddedBytesSelector.java * (add) tika-core/src/main/java/org/apache/tika/extractor/AbstractEmbeddedDocumentBytesHandler.java * (add) tika-core/src/main/java/org/apache/tika/extractor/EmbeddedBytesSelector.java * (edit) tika-core/src/main/java/org/apache/tika/io/BoundedInputStream.java * (edit) tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java * (edit) tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java * (edit) tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java * (add) tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/AsyncProcessorTest.java * (add) tika-pipes/tika-async-cli/src/test/resources/test-documents/basic_embedded.xml * (add) tika-core/src/test/resources/org/apache/tika/config/TIKA-4207-embedded-bytes-config.xml > PipesParser should have option to extract raw bytes of embedded files > --------------------------------------------------------------------- > > Key: TIKA-4207 > URL: https://issues.apache.org/jira/browse/TIKA-4207 > Project: Tika > Issue Type: New Feature > Reporter: Tim Allison > Priority: Major > Fix For: 3.0.0 > > > There are many use cases, where text+metadata are important, but users also > need the raw bytes from embedded files. > Let's make it possible to extract the usual rmeta content in _and_ the raw > bytes. This is a preliminary step that will offer more customization options > than the proposal in TIKA-3703. > This is targeted to 3.x. -- This message was sent by Atlassian Jira (v8.20.10#820010)