This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4299 in repository https://gitbox.apache.org/repos/asf/tika.git
commit 17745a4ec18a6b18e51649569b67d0ac4a17dc02 Author: tallison <talli...@apache.org> AuthorDate: Mon Aug 19 12:15:46 2024 -0400 TIKA-4299 -- clean up pagination in AbstractPDF2XHTML. --- .../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java index 53e28514f..f9a6c27ff 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java @@ -180,7 +180,6 @@ class AbstractPDF2XHTML extends PDFTextStripper { private final Set<COSBase> extractedFiles = new HashSet<>(); //zero-based pageIndex int pageIndex = 0; - int startPage = -1; //private in PDFTextStripper...must have own copy because we override processpages int unmappedUnicodeCharsPerPage = 0; int totalCharsPerPage = 0; @@ -1358,18 +1357,6 @@ class AbstractPDF2XHTML extends PDFTextStripper { */ @Override protected void processPages(PDPageTree pages) throws IOException { - //we currently need this hack because we aren't able to increment - //the private currentPageNo in PDFTextStripper, - //and PDFTextStripper's processPage relies on that variable - //being >= startPage when deciding whether or not to process a page - // See: - // if (currentPageNo >= startPage && currentPageNo <= endPage - // && (startBookmarkPageNumber == -1 || - // currentPageNo >= startBookmarkPageNumber) - // && (endBookmarkPageNumber == -1 || - // currentPageNo <= endBookmarkPageNumber)) - // { - super.setStartPage(1); for (PDPage page : pages) { if (getCurrentPageNo() >= getStartPage() && getCurrentPageNo() <= getEndPage()) { processPage(page); @@ -1392,15 +1379,6 @@ class AbstractPDF2XHTML extends PDFTextStripper { "to implement this."); } - @Override - public int getStartPage() { - return startPage; - } - - @Override - public void setStartPage(int startPage) { - this.startPage = startPage; - } @Override protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code,