Author: tilman
Date: Wed Oct  1 10:15:07 2025
New Revision: 1928880

Log:
PDFBOX-6080: follow /EndOfLine setting if it exists, use heuristics from 
twelvemonkeys if not

Modified:
   
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java

Modified: 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java
==============================================================================
--- 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java
       Wed Oct  1 10:15:03 2025        (r1928879)
+++ 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java
       Wed Oct  1 10:15:07 2025        (r1928880)
@@ -68,29 +68,39 @@ final class CCITTFaxFilter extends Filte
         long tiffOptions = 0;
         if (k == 0)
         {
-            type = TIFFExtension.COMPRESSION_CCITT_T4; // Group 3 1D
-            byte[] streamData = new byte[20];
-            int bytesRead = encoded.read(streamData);
-            if (bytesRead == -1)
+            if (decodeParms.containsKey(COSName.END_OF_LINE))
             {
-                throw new IOException("EOF while reading CCITT header");
+                // PDFBOX-6080: respect the parameter if it exists
+                boolean hasEndOfLine = 
decodeParms.getBoolean(COSName.END_OF_LINE, false);
+                type = hasEndOfLine ? TIFFExtension.COMPRESSION_CCITT_T4 : 
TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE;
             }
-            PushbackInputStream pushbackInputStream = new 
PushbackInputStream(encoded, streamData.length);
-            pushbackInputStream.unread(streamData, 0, bytesRead);
-            encoded = pushbackInputStream;
-            if (streamData[0] != 0 || (streamData[1] >> 4 != 1 && 
streamData[1] != 1))
+            else
             {
-                // leading EOL (0b000000000001) not found, search further and 
try RLE if not
-                // found
-                type = TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE;
-                short b = (short) (((streamData[0] << 8) + (streamData[1] & 
0xff)) >> 4);
-                for (int i = 12; i < bytesRead * 8; i++)
+                // In twelvemonkeys, this part is found in 
CCITTFaxDecoderStream.findCompressionType()
+                // needed for 015315-p8-ccitt.pdf, PDFBOX-2123-1bit.pdf, 
PDFBOX-2778.pdf
+                type = TIFFExtension.COMPRESSION_CCITT_T4; // Group 3 1D
+                byte[] streamData = new byte[20];
+                int bytesRead = encoded.read(streamData);
+                if (bytesRead == -1)
                 {
-                    b = (short) ((b << 1) + ((streamData[(i / 8)] >> (7 - (i % 
8))) & 0x01));
-                    if ((b & 0xFFF) == 1)
+                    throw new IOException("EOF while reading CCITT header");
+                }
+                PushbackInputStream pushbackInputStream = new 
PushbackInputStream(encoded, streamData.length);
+                pushbackInputStream.unread(streamData, 0, bytesRead);
+                encoded = pushbackInputStream;
+                if (streamData[0] != 0 || (streamData[1] >> 4 != 1 && 
streamData[1] != 1))
+                {
+                    // leading EOL (0b000000000001) not found, search further 
and try RLE if not found
+                    type = 
TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE;
+                    short b = (short) (((streamData[0] << 8) + (streamData[1] 
& 0xff)) >> 4);
+                    for (int i = 12; i < bytesRead * 8; i++)
                     {
-                        type = TIFFExtension.COMPRESSION_CCITT_T4;
-                        break;
+                        b = (short) ((b << 1) + ((streamData[(i / 8)] >> (7 - 
(i % 8))) & 0x01));
+                        if ((b & 0xFFF) == 1)
+                        {
+                            type = TIFFExtension.COMPRESSION_CCITT_T4;
+                            break;
+                        }
                     }
                 }
             }

Reply via email to