This is an automated email from the ASF dual-hosted git repository. github-bot pushed a commit to branch camel-main in repository https://gitbox.apache.org/repos/asf/camel-quarkus.git
commit bd31b19e493b00b38f41e6c4df9fb4f91c1e0c55 Author: JiriOndrusek <ondrusek.j...@gmail.com> AuthorDate: Thu Aug 31 14:28:31 2023 +0200 Tika - aded explabation and limitation regarding pdf, see #5234 --- docs/modules/ROOT/pages/reference/extensions/tika.adoc | 2 ++ extensions/tika/runtime/src/main/doc/limitations.adoc | 2 ++ integration-tests/tika/src/main/resources/application.properties | 3 ++- .../java/org/apache/camel/quarkus/component/tika/it/TikaTest.java | 4 ++-- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/modules/ROOT/pages/reference/extensions/tika.adoc b/docs/modules/ROOT/pages/reference/extensions/tika.adoc index bf5efdddc7..56712b2cda 100644 --- a/docs/modules/ROOT/pages/reference/extensions/tika.adoc +++ b/docs/modules/ROOT/pages/reference/extensions/tika.adoc @@ -54,6 +54,8 @@ can be changed only via `application.properties`. While you can use any of the available https://tika.apache.org/1.24.1/formats.html[Tika parsers] in JVM mode, only some of those are supported in native mode - see the https://quarkiverse.github.io/quarkiverse-docs/quarkus-tika/dev/index.html[Quarkus Tika guide]. +PDF and ODF parsers can not be used both in JVM mode or in the native mode. Pdf extension is suggested for purposes of pdf consumption to avoid a version conflict between Camel and Quarkus-tika extension involving PdfBox dependency. + Use of the Tika parser without any configuration will initialize all available parsers. Unfortunately as some of them don't work in the native mode, the whole execution will fail. diff --git a/extensions/tika/runtime/src/main/doc/limitations.adoc b/extensions/tika/runtime/src/main/doc/limitations.adoc index 8edfbeddbc..1d0f7a75c4 100644 --- a/extensions/tika/runtime/src/main/doc/limitations.adoc +++ b/extensions/tika/runtime/src/main/doc/limitations.adoc @@ -4,6 +4,8 @@ can be changed only via `application.properties`. While you can use any of the available https://tika.apache.org/1.24.1/formats.html[Tika parsers] in JVM mode, only some of those are supported in native mode - see the https://quarkiverse.github.io/quarkiverse-docs/quarkus-tika/dev/index.html[Quarkus Tika guide]. +PDF and ODF parsers can not be used both in JVM mode or in the native mode. Pdf extension is suggested for purposes of pdf consumption to avoid a version conflict between Camel and Quarkus-tika extension involving PdfBox dependency. + Use of the Tika parser without any configuration will initialize all available parsers. Unfortunately as some of them don't work in the native mode, the whole execution will fail. diff --git a/integration-tests/tika/src/main/resources/application.properties b/integration-tests/tika/src/main/resources/application.properties index fb3468886b..536d32868c 100644 --- a/integration-tests/tika/src/main/resources/application.properties +++ b/integration-tests/tika/src/main/resources/application.properties @@ -15,7 +15,8 @@ ## limitations under the License. ## --------------------------------------------------------------------------- -quarkus.tika.parsers= pdf,odf,office,xml,image +#quarkus.tika.parsers= pdf,odf,office,xml,image //Requires new release of quarkiverse-tike, which adopts tika with pdfBox 3.x +quarkus.tika.parsers= odf,office,xml,image quarkus.tika.parser.office = org.apache.tika.parser.microsoft.OfficeParser quarkus.tika.parser.image = org.apache.tika.parser.image.ImageParser quarkus.tika.parser.xml = org.apache.tika.parser.xml.DcXMLParser \ No newline at end of file diff --git a/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java b/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java index 45fc59d695..adf61f13f1 100644 --- a/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java +++ b/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java @@ -34,13 +34,13 @@ import static org.hamcrest.Matchers.startsWith; @QuarkusTest class TikaTest { - @Disabled //https://github.com/apache/camel-quarkus/issues/5234 + @Disabled //Requires new release of quarkiverse-tike, which adopts tika with pdfBox 3.x https://github.com/apache/camel-quarkus/issues/5234 @Test public void testPdf() throws Exception { testParse("quarkus.pdf", "application/pdf", "Hello Quarkus"); } - @Disabled //https://github.com/apache/camel-quarkus/issues/5234 + @Disabled //Requires new release of quarkiverse-tike, which adopts tika with pdfBox 3.x https://github.com/apache/camel-quarkus/issues/5234 @Test public void testOdf() throws Exception { testParse("testOpenOffice2.odt", "application/vnd.oasis.opendocument.text",