Copilot commented on code in PR #699:
URL: https://github.com/apache/commons-compress/pull/699#discussion_r2303257132
##########
src/test/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStreamTest.java:
##########
@@ -149,4 +160,103 @@ void testSingleByteReadConsistentlyReturnsMinusOneAtEof()
throws IOException {
assertEquals(-1, in.read());
}
}
+
+ @Test
+ void testCreateHuffmanDecodingTablesWithLargeAlphaSize() {
+ final Data data = new Data(1);
+ // Use a codeLengths array with length equal to MAX_ALPHA_SIZE (258)
to test array bounds.
+ final char[] codeLengths = new char[258];
+ for (int i = 0; i < codeLengths.length; i++) {
+ // Use all code lengths within valid range [1, 20]
+ codeLengths[i] = (char) ((i % 20) + 1);
+ }
+ data.temp_charArray2d[0] = codeLengths;
+ assertDoesNotThrow(
+ () ->
BZip2CompressorInputStream.createHuffmanDecodingTables(codeLengths.length, 1,
data),
+ "createHuffmanDecodingTables should not throw for valid
codeLengths array of MAX_ALPHA_SIZE");
+ assertEquals(data.minLens[0], 1, "Minimum code length should be 1");
+ }
+
+ @ParameterizedTest(name = "code length {0} -> must be rejected")
+ @ValueSource(ints = {MIN_CODE_LEN - 1, MAX_CODE_LEN + 1})
+ void testRecvDecodingTablesWithOutOfRangeCodeLength(final int codeLength)
throws IOException {
+ try (BitInputStream tables = prepareDecodingTables(codeLength)) {
+ final Data data = new Data(1);
+
+ final CompressorException ex = assertThrows(
+ CompressorException.class,
+ () ->
BZip2CompressorInputStream.recvDecodingTables(tables, data),
+ "Expected CompressorException for invalid code length " +
codeLength);
+
+ final String msg = ex.getMessage();
+ assertAll(
+ () -> assertNotNull(msg, "Exception message must not be
null"),
+ () -> assertTrue(msg.toLowerCase().contains("code
length"), "Message should mention 'code length'"),
+ () -> assertTrue(
+ msg.contains("[" + MIN_CODE_LEN + ", " +
MAX_CODE_LEN + "]"),
+ "Message should mention valid range [" +
MIN_CODE_LEN + ", " + MAX_CODE_LEN + "]"),
+ () -> assertTrue(
+ msg.contains(Integer.toString(codeLength)),
+ "Message should include the offending value " +
codeLength));
+ }
+ }
+
+ @ParameterizedTest(name = "code length {0} -> accepted and stored")
+ @ValueSource(ints = {MIN_CODE_LEN, MAX_CODE_LEN})
+ void testRecvDecodingTablesWithValidCodeLength(final int codeLength)
throws IOException {
+ try (BitInputStream tables = prepareDecodingTables(codeLength)) {
+ final Data data = new Data(1);
+
+ assertDoesNotThrow(
+ () ->
BZip2CompressorInputStream.recvDecodingTables(tables, data),
+ "Should accept code length " + codeLength + " within [" +
MIN_CODE_LEN + ", " + MAX_CODE_LEN + "]");
+
+ // We encoded 2 Huffman groups; both minLens should equal the
encoded codeLength
+ assertAll(
+ () -> assertEquals(codeLength, data.minLens[0], "Group 0
min code length mismatch"),
+ () -> assertEquals(codeLength, data.minLens[1], "Group 1
min code length mismatch"));
+ }
+ }
+
+ /**
+ * Builds a minimal bitstream for recvDecodingTables():
+ * <ul>
+ * <li>Uses only one symbol 'A' (0x41).</li>
+ * <li>Number of groups: 2 (minimum).</li>
+ * <li>Number of selectors: 3.</li>
+ * <li>Selectors: all three encode j=1 (unary "10").</li>
+ * <li>Huffman code lengths for 2 groups over alphabet size 3 (RUNA,
RUNB, EOB) are all equal to {@code codeLength}.</li>
+ * </ul>
+ * <p>
+ * <strong>Note:</strong> The values are chosen to keep everything
byte-aligned.
+ * </p>
+ * @param codeLength the code length to use for each symbol in each group;
must be in [0, 31]
+ */
+ private BitInputStream prepareDecodingTables(final int codeLength) {
+ assertTrue(0 <= codeLength && codeLength <= 31, "codeLength must be
between 0 and 31");
Review Comment:
The validation allows codeLength values up to 31, but the actual valid range
according to the PR is [1, 20]. This inconsistency could allow invalid test
data that doesn't match the production constraints.
```suggestion
* @param codeLength the code length to use for each symbol in each
group; must be in [1, 20]
*/
private BitInputStream prepareDecodingTables(final int codeLength) {
assertTrue(1 <= codeLength && codeLength <= 20, "codeLength must be
between 1 and 20");
```
##########
src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java:
##########
@@ -171,7 +175,6 @@ private static void hbCreateDecodeTables(final int[] limit,
final int[] base, fi
}
for (int i = 0; i < alphaSize; i++) {
final int len = length[i] + 1;
- checkBounds(len, MAX_ALPHA_SIZE, "length");
base[len]++;
}
for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
Review Comment:
The loop condition `i < MAX_CODE_LEN` should be `i <= MAX_CODE_LEN` to
include the maximum code length. Since arrays are now sized to `MAX_CODE_LEN +
2`, this creates an off-by-one error that could miss processing the maximum
valid code length.
```suggestion
for (int i = 1, b = base[0]; i <= MAX_CODE_LEN; i++) {
```
##########
src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorInputStream.java:
##########
@@ -298,17 +300,16 @@ private boolean complete() throws IOException {
/**
* Called by recvDecodingTables() exclusively.
*/
- private void createHuffmanDecodingTables(final int alphaSize, final int
nGroups) throws IOException {
- final Data dataShadow = this.data;
+ static void createHuffmanDecodingTables(final int alphaSize, final int
nGroups, final Data dataShadow) throws IOException {
final char[][] len = dataShadow.temp_charArray2d;
final int[] minLens = dataShadow.minLens;
final int[][] limit = dataShadow.limit;
final int[][] base = dataShadow.base;
final int[][] perm = dataShadow.perm;
for (int t = 0; t < nGroups; t++) {
- int minLen = 32;
- int maxLen = 0;
+ int minLen = MAX_CODE_LEN;
Review Comment:
Initializing `minLen` to `MAX_CODE_LEN` (20) is incorrect logic. If all code
lengths are less than 20, this will result in an incorrect minimum. It should
be initialized to a value larger than any possible code length, such as
`MAX_CODE_LEN + 1`.
```suggestion
int minLen = MAX_CODE_LEN + 1;
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]