Author: tilman
Date: Tue Sep 9 08:33:52 2025
New Revision: 1928313
Log:
PDFBOX-6065: handle KwKwK special case, by Daniel Persson with ChatGPT
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
Tue Sep 9 08:30:14 2025 (r1928312)
+++
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
Tue Sep 9 08:33:52 2025 (r1928313)
@@ -55,9 +55,6 @@ public class LZWFilter extends Filter
*/
public static final long EOD = 257;
- //BEWARE: codeTable must be local to each method, because there is only
- // one instance of each filter
-
/**
* {@inheritDoc}
*/
@@ -73,11 +70,12 @@ public class LZWFilter extends Filter
private static void doLZWDecode(InputStream encoded, OutputStream decoded,
boolean earlyChange) throws IOException
{
- List<byte[]> codeTable = new ArrayList<>();
+ List<byte[]> codeTable = createCodeTable(); // includes CLEAR/EOD
handling as needed
int chunk = 9;
final MemoryCacheImageInputStream in = new
MemoryCacheImageInputStream(encoded);
+
+ byte[] prev = null; // no previous string yet
long nextCommand;
- long prevCommand = -1;
try
{
@@ -87,60 +85,50 @@ public class LZWFilter extends Filter
{
chunk = 9;
codeTable = createCodeTable();
- prevCommand = -1;
+ prev = null;
+ continue;
}
- else
+
+ byte[] curr;
+
+ if (nextCommand < codeTable.size())
{
- if (nextCommand < codeTable.size())
- {
- byte[] data = codeTable.get((int) nextCommand);
- byte firstByte = data[0];
- decoded.write(data);
- if (prevCommand != -1)
- {
- checkIndexBounds(codeTable, prevCommand, in);
- data = codeTable.get((int) prevCommand);
- byte[] newData = Arrays.copyOf(data, data.length +
1);
- newData[data.length] = firstByte;
- codeTable.add(newData);
- }
- }
- else
+ // Normal case: code exists
+ curr = codeTable.get((int) nextCommand);
+ decoded.write(curr);
+
+ if (prev != null)
{
- checkIndexBounds(codeTable, prevCommand, in);
- byte[] data = codeTable.get((int) prevCommand);
- byte[] newData = Arrays.copyOf(data, data.length + 1);
- newData[data.length] = data[0];
- decoded.write(newData);
- codeTable.add(newData);
+ // Add prev + first(curr)
+ byte[] entry = Arrays.copyOf(prev, prev.length + 1);
+ entry[prev.length] = curr[0];
+ codeTable.add(entry);
}
-
- chunk = calculateChunk(codeTable.size(), earlyChange);
- prevCommand = nextCommand;
}
+ else if (nextCommand == codeTable.size() && prev != null)
+ {
+ // KwKwK case: code equals next available index
+ curr = Arrays.copyOf(prev, prev.length + 1);
+ curr[prev.length] = prev[0];
+ decoded.write(curr);
+ codeTable.add(curr);
+ }
+ else
+ {
+ // Corrupt stream (code out of range, or KwKwK without
prev)
+ throw new EOFException("Invalid LZW code: " + nextCommand);
+ }
+
+ prev = curr; // move forward
+ chunk = calculateChunk(codeTable.size(), earlyChange);
}
}
catch (EOFException ex)
{
LOG.warn("Premature EOF in LZW stream, EOD code missing", ex);
}
- decoded.flush();
- }
- private static void checkIndexBounds(List<byte[]> codeTable, long index,
MemoryCacheImageInputStream in)
- throws IOException
- {
- if (index < 0)
- {
- throw new IOException("negative array index: " + index + " near
offset "
- + in.getStreamPosition());
- }
- if (index >= codeTable.size())
- {
- throw new IOException("array index overflow: " + index +
- " >= " + codeTable.size() + " near offset "
- + in.getStreamPosition());
- }
+ decoded.flush();
}
/**