[ https://issues.apache.org/jira/browse/TIKA-3571?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17533970#comment-17533970 ]
Hudson commented on TIKA-3571: ------------------------------ SUCCESS: Integrated in Jenkins build Tika ยป tika-main-jdk8 #551 (See [https://ci-builds.apache.org/job/Tika/job/tika-main-jdk8/551/]) TIKA-3571 -- cleanup (tallison: [https://github.com/apache/tika/commit/678898a9d2a0f56b7639c245f0cd9b1842ccddd2]) * (delete) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFRenderingState.java * (edit) tika-core/src/main/java/org/apache/tika/metadata/Rendering.java * (add) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/pdfbox/TextOnlyPDFRenderer.java * (add) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/pdfbox/PDFRenderingState.java * (add) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/pdfbox/PDFBoxRenderer.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/ImageGraphicsEngine.java * (delete) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/VectorGraphicsOnlyPDFRenderer.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java * (add) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/pdfbox/VectorGraphicsOnlyPDFRenderer.java * (edit) tika-core/src/main/java/org/apache/tika/metadata/PagedText.java * (delete) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDDocumentRenderer.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFRenderingTest.java * (add) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/pdfbox/NoTextPDFRenderer.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java * (delete) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/MuPDFRenderer.java * (add) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/pdfbox/PDDocumentRenderer.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java * (add) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/test-documents/testPDF_rotated.pdf * (add) tika-core/src/main/java/org/apache/tika/metadata/TikaPagedText.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java * (delete) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/TextOnlyPDFRenderer.java * (edit) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-rendering-config.xml * (add) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/mutool/MuPDFRenderer.java * (edit) tika-core/src/main/java/org/apache/tika/renderer/PageBasedRenderResults.java * (delete) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFBoxRenderer.java * (delete) tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/NoTextPDFRenderer.java > Add an interface for rendering engines > -------------------------------------- > > Key: TIKA-3571 > URL: https://issues.apache.org/jira/browse/TIKA-3571 > Project: Tika > Issue Type: Wish > Reporter: Tim Allison > Assignee: Tim Allison > Priority: Major > Fix For: 2.4.1 > > > We've now seen a few requests for extracting text _and_ rendering PDFs, and > certainly it might be useful to have alternatives for rendering files (e.g. > this [Alfresco > study|https://hub.alfresco.com/t5/alfresco-content-services-blog/pdf-rendering-engine-performance-and-fidelity-comparison/ba-p/287618]), > including MSOffice or at least PPTx... > And there are cases where users don't want the rendered images, but they do > want OCR to be run against the rendered images. > I doubt I'll have a chance to work on this for a while, but I wanted to open > an issue for discussion. -- This message was sent by Atlassian Jira (v8.20.7#820007)