This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4299
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 17745a4ec18a6b18e51649569b67d0ac4a17dc02
Author: tallison <talli...@apache.org>
AuthorDate: Mon Aug 19 12:15:46 2024 -0400

    TIKA-4299 -- clean up pagination in AbstractPDF2XHTML.
---
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index 53e28514f..f9a6c27ff 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -180,7 +180,6 @@ class AbstractPDF2XHTML extends PDFTextStripper {
     private final Set<COSBase> extractedFiles = new HashSet<>();
     //zero-based pageIndex
     int pageIndex = 0;
-    int startPage = -1;
     //private in PDFTextStripper...must have own copy because we override 
processpages
     int unmappedUnicodeCharsPerPage = 0;
     int totalCharsPerPage = 0;
@@ -1358,18 +1357,6 @@ class AbstractPDF2XHTML extends PDFTextStripper {
      */
     @Override
     protected void processPages(PDPageTree pages) throws IOException {
-        //we currently need this hack because we aren't able to increment
-        //the private currentPageNo in PDFTextStripper,
-        //and PDFTextStripper's processPage relies on that variable
-        //being >= startPage when deciding whether or not to process a page
-        // See:
-        // if (currentPageNo >= startPage && currentPageNo <= endPage
-        //                && (startBookmarkPageNumber == -1 ||
-        //                currentPageNo >= startBookmarkPageNumber)
-        //                && (endBookmarkPageNumber == -1 ||
-        //                currentPageNo <= endBookmarkPageNumber))
-        //        {
-        super.setStartPage(1);
         for (PDPage page : pages) {
             if (getCurrentPageNo() >= getStartPage() && getCurrentPageNo() <= 
getEndPage()) {
                 processPage(page);
@@ -1392,15 +1379,6 @@ class AbstractPDF2XHTML extends PDFTextStripper {
                         "to implement this.");
     }
 
-    @Override
-    public int getStartPage() {
-        return startPage;
-    }
-
-    @Override
-    public void setStartPage(int startPage) {
-        this.startPage = startPage;
-    }
 
     @Override
     protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code,

Reply via email to