Author: lehmi
Date: Sun Jun  1 11:50:43 2025
New Revision: 1926036

URL: http://svn.apache.org/viewvc?rev=1926036&view=rev
Log:
PDFBOX-5992: skip either a line break (CR, LF or CRLF) or any one-byte 
whitespace at the beginning of an inline image

Modified:
    
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Modified: 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1926036&r1=1926035&r2=1926036&view=diff
==============================================================================
--- 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
 (original)
+++ 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
 Sun Jun  1 11:50:43 2025
@@ -356,6 +356,11 @@ public abstract class BaseParser
         return true;
     }
 
+    /**
+     * Skip the upcoming CRLF or LF which are supposed to follow a stream. 
Trailing spaces are removed as well.
+     * 
+     * @throws IOException if something went wrong
+     */
     protected void skipWhiteSpaces() throws IOException
     {
         //PDF Ref 3.2.7 A stream must be followed by either
@@ -370,24 +375,56 @@ public abstract class BaseParser
         {
             whitespace = seqSource.read();
         }
+        if (!skipLinebreak(whitespace))
+        {
+            seqSource.unread(whitespace);
+        }
+    }
 
-        if (ASCII_CR == whitespace)
+    /**
+     * Skip one line break, such as CR, LF or CRLF.
+     * 
+     * @return true if a line break was found and removed.
+     * 
+     * @throws IOException if something went wrong
+     */
+    protected boolean skipLinebreak() throws IOException
+    {
+        int whitespace = seqSource.read();
+        // a line break is a CR, or LF or CRLF
+        if (!skipLinebreak(whitespace))
         {
-            whitespace = seqSource.read();
-            if (ASCII_LF != whitespace)
+            seqSource.unread(whitespace);
+            return false;
+        }
+        return true;
+    }
+
+    /**
+     * Skip one line break, such as CR, LF or CRLF.
+     * 
+     * @param linebreak the first character to be checked.
+     * 
+     * @return true if a line break was found and removed.
+     * 
+     * @throws IOException if something went wrong
+     */
+    private boolean skipLinebreak(int linebreak) throws IOException
+    {
+        // a line break is a CR, or LF or CRLF
+        if (isCR(linebreak))
+        {
+            int next = seqSource.read();
+            if (!isLF(next))
             {
-                seqSource.unread(whitespace);
-                //The spec says this is invalid but it happens in the real
-                //world so we must support it.
+                seqSource.unread(next);
             }
         }
-        else if (ASCII_LF != whitespace)
+        else if (!isLF(linebreak))
         {
-            //we are in an error.
-            //but again we will do a lenient parsing and just assume that 
everything
-            //is fine
-            seqSource.unread(whitespace);
+            return false;
         }
+        return true;
     }
 
     /**

Modified: 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1926036&r1=1926035&r2=1926036&view=diff
==============================================================================
--- 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 (original)
+++ 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 Sun Jun  1 11:50:43 2025
@@ -313,9 +313,10 @@ public class PDFStreamParser extends Bas
                             "' at stream offset " + currentPosition);
                 }
                 ByteArrayOutputStream imageData = new ByteArrayOutputStream();
-                if( isWhitespace() )
+                // skip one line break (CR, LF or CRLF) or any one-byte 
whitespace
+                if (!skipLinebreak() && isWhitespace())
                 {
-                    //pull off the whitespace character
+                    // pull off the whitespace character
                     seqSource.read();
                 }
                 int lastByte = seqSource.read();


Reply via email to