Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package libdeflate for openSUSE:Factory checked in at 2022-02-15 23:57:46 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/libdeflate (Old) and /work/SRC/openSUSE:Factory/.libdeflate.new.1956 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "libdeflate" Tue Feb 15 23:57:46 2022 rev:4 rq:955073 version:1.10 Changes: -------- --- /work/SRC/openSUSE:Factory/libdeflate/libdeflate.changes 2022-01-25 17:38:34.273294947 +0100 +++ /work/SRC/openSUSE:Factory/.libdeflate.new.1956/libdeflate.changes 2022-02-15 23:58:14.092370669 +0100 @@ -1,0 +2,8 @@ +Mon Feb 14 23:14:56 UTC 2022 - Dirk M??ller <dmuel...@suse.com> + +- update to 1.10: + * Added an additional check to the decompressor to make it quickly detect + certain bad inputs and not try to generate an unbounded amount of output. + * Cleaned up a few things in the compression code. + +------------------------------------------------------------------- Old: ---- libdeflate-1.9.tar.gz New: ---- libdeflate-1.10.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ libdeflate.spec ++++++ --- /var/tmp/diff_new_pack.Hq6TPb/_old 2022-02-15 23:58:14.476371729 +0100 +++ /var/tmp/diff_new_pack.Hq6TPb/_new 2022-02-15 23:58:14.480371740 +0100 @@ -19,7 +19,7 @@ %define major 0 %define libname %{name}%{major} Name: libdeflate -Version: 1.9 +Version: 1.10 Release: 0 Summary: Library for DEFLATE/zlib/gzip compression and decompression License: BSD-2-Clause ++++++ libdeflate-1.9.tar.gz -> libdeflate-1.10.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libdeflate-1.9/NEWS.md new/libdeflate-1.10/NEWS.md --- old/libdeflate-1.9/NEWS.md 2022-01-12 06:24:28.000000000 +0100 +++ new/libdeflate-1.10/NEWS.md 2022-02-06 23:48:22.000000000 +0100 @@ -1,5 +1,19 @@ # libdeflate release notes +## Version 1.10 + +* Added an additional check to the decompressor to make it quickly detect + certain bad inputs and not try to generate an unbounded amount of output. + + Note: this was only a problem when decompressing with an unknown output size, + which isn't the recommended use case of libdeflate. However, + `libdeflate-gunzip` has to do this, and it would run out of memory as it would + keep trying to allocate a larger output buffer. + +* Fixed a build error on Solaris. + +* Cleaned up a few things in the compression code. + ## Version 1.9 * Made many improvements to the compression algorithms, and rebalanced the @@ -9,8 +23,8 @@ ratio on data where short matches aren't useful, such as DNA sequencing data. This applies to all compression levels, but primarily to levels 1-9. - * Levels 1 was made much faster, though it often compresses slightly worse - than before (but still better than zlib). + * Level 1 was made much faster, though it often compresses slightly worse than + before (but still better than zlib). * Levels 8-9 were also made faster, though they often compress slightly worse than before (but still better than zlib). On some data, levels 8-9 are much diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libdeflate-1.9/lib/decompress_template.h new/libdeflate-1.10/lib/decompress_template.h --- old/libdeflate-1.9/lib/decompress_template.h 2022-01-12 06:24:28.000000000 +0100 +++ new/libdeflate-1.10/lib/decompress_template.h 2022-02-06 23:48:22.000000000 +0100 @@ -43,7 +43,7 @@ const u8 * const in_end = in_next + in_nbytes; bitbuf_t bitbuf = 0; unsigned bitsleft = 0; - size_t overrun_count = 0; + size_t overread_count = 0; unsigned i; unsigned is_final_block; unsigned block_type; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libdeflate-1.9/lib/deflate_compress.c new/libdeflate-1.10/lib/deflate_compress.c --- old/libdeflate-1.9/lib/deflate_compress.c 2022-01-12 06:24:28.000000000 +0100 +++ new/libdeflate-1.10/lib/deflate_compress.c 2022-02-06 23:48:22.000000000 +0100 @@ -52,10 +52,10 @@ /* * This is the minimum block length that the compressor will use, in - * uncompressed bytes. It is also the amount by which the final block is - * allowed to grow past the soft maximum length in order to avoid using a very - * short block at the end. This should be a value below which using shorter - * blocks is unlikely to be worthwhile, due to the per-block overhead. + * uncompressed bytes. It is also approximately the amount by which the final + * block is allowed to grow past the soft maximum length in order to avoid using + * a very short block at the end. This should be a value below which using + * shorter blocks is unlikely to be worthwhile, due to the per-block overhead. * * Defining a fixed minimum block length is needed in order to guarantee a * reasonable upper bound on the compressed size. It's also needed because our @@ -94,8 +94,8 @@ * For deflate_compress_fastest(): This is the soft maximum block length. * deflate_compress_fastest() doesn't use the regular block splitting algorithm; * it only ends blocks when they reach FAST_SOFT_MAX_BLOCK_LENGTH bytes or - * FAST_SEQ_STORE_LENGTH - 1 matches. Therefore, this value should be lower - * than the regular SOFT_MAX_BLOCK_LENGTH. + * FAST_SEQ_STORE_LENGTH matches. Therefore, this value should be lower than + * the regular SOFT_MAX_BLOCK_LENGTH. */ #define FAST_SOFT_MAX_BLOCK_LENGTH 65535 @@ -490,7 +490,7 @@ /* Frequency counters for the current block */ struct deflate_freqs freqs; - /* Block split statistics for the currently pending block */ + /* Block split statistics for the current block */ struct block_split_stats split_stats; /* Dynamic Huffman codes for the current block */ @@ -648,7 +648,7 @@ */ u8 *next; - /* Pointer just past the end of the output buffer */ + /* Pointer to just past the end of the output buffer */ u8 *end; }; @@ -664,8 +664,7 @@ #define OUTPUT_END_PADDING 8 /* - * Initialize the output bitstream. 'size' is assumed to be at least - * OUTPUT_END_PADDING. + * Initialize the output bitstream. 'size' must be at least OUTPUT_END_PADDING. */ static void deflate_init_output(struct deflate_output_bitstream *os, @@ -680,11 +679,12 @@ /* * Add some bits to the bitbuffer variable of the output bitstream. The caller - * must make sure there is enough room. + * must ensure that os->bitcount + num_bits <= BITBUF_NBITS, by calling + * deflate_flush_bits() frequently enough. */ static forceinline void deflate_add_bits(struct deflate_output_bitstream *os, - const bitbuf_t bits, const unsigned num_bits) + bitbuf_t bits, unsigned num_bits) { os->bitbuf |= bits << os->bitcount; os->bitcount += num_bits; @@ -712,6 +712,18 @@ } } +/* + * Add bits, then flush right away. Only use this where it is difficult to + * batch up calls to deflate_add_bits(). + */ +static forceinline void +deflate_write_bits(struct deflate_output_bitstream *os, + bitbuf_t bits, unsigned num_bits) +{ + deflate_add_bits(os, bits, num_bits); + deflate_flush_bits(os); +} + /* Align the bitstream on a byte boundary. */ static forceinline void deflate_align_bitstream(struct deflate_output_bitstream *os) @@ -722,7 +734,8 @@ /* * Flush any remaining bits to the output buffer if needed. Return the total - * number of bytes written to the output buffer, or 0 if an overflow occurred. + * number of bytes that have been written to the output buffer since + * deflate_init_output(), or 0 if an overflow occurred. */ static size_t deflate_flush_output(struct deflate_output_bitstream *os) @@ -743,7 +756,7 @@ * Given the binary tree node A[subtree_idx] whose children already satisfy the * maxheap property, swap the node with its greater child until it is greater * than or equal to both of its children, so that the maxheap property is - * satisfied in the subtree rooted at A[subtree_idx]. + * satisfied in the subtree rooted at A[subtree_idx]. 'A' uses 1-based indices. */ static void heapify_subtree(u32 A[], unsigned length, unsigned subtree_idx) @@ -805,7 +818,7 @@ #define NUM_SYMBOL_BITS 10 #define SYMBOL_MASK ((1 << NUM_SYMBOL_BITS) - 1) -#define GET_NUM_COUNTERS(num_syms) ((((num_syms) + 3 / 4) + 3) & ~3) +#define GET_NUM_COUNTERS(num_syms) (num_syms) /* * Sort the symbols primarily by frequency and secondarily by symbol value. @@ -843,26 +856,13 @@ unsigned counters[GET_NUM_COUNTERS(DEFLATE_MAX_NUM_SYMS)]; /* - * We rely on heapsort, but with an added optimization. Since it's - * common for most symbol frequencies to be low, we first do a count - * sort using a limited number of counters. High frequencies will be - * counted in the last counter, and only they will be sorted with - * heapsort. + * We use heapsort, but with an added optimization. Since often most + * symbol frequencies are low, we first do a count sort using a limited + * number of counters. High frequencies are counted in the last + * counter, and only they will be sorted with heapsort. * * Note: with more symbols, it is generally beneficial to have more - * counters. About 1 counter per 4 symbols seems fast. - * - * Note: I also tested radix sort, but even for large symbol counts (> - * 255) and frequencies bounded at 16 bits (enabling radix sort by just - * two base-256 digits), it didn't seem any faster than the method - * implemented here. - * - * Note: I tested the optimized quicksort implementation from glibc - * (with indirection overhead removed), but it was only marginally - * faster than the simple heapsort implemented here. - * - * Tests were done with building the codes for LZX. Results may vary - * for different compression algorithms...! + * counters. About 1 counter per symbol seems fastest. */ num_counters = GET_NUM_COUNTERS(num_syms); @@ -909,7 +909,7 @@ } /* - * Build the Huffman tree. + * Build a Huffman tree. * * This is an optimized implementation that * (a) takes advantage of the frequencies being already sorted; @@ -1103,6 +1103,33 @@ } } +/* Reverse the Huffman codeword 'codeword', which is 'len' bits in length. */ +static u32 +reverse_codeword(u32 codeword, u8 len) +{ + /* + * The following branchless algorithm is faster than going bit by bit. + * Note: since no codewords are longer than 16 bits, we only need to + * reverse the low 16 bits of the 'u32'. + */ + STATIC_ASSERT(DEFLATE_MAX_CODEWORD_LEN <= 16); + + /* Flip adjacent 1-bit fields. */ + codeword = ((codeword & 0x5555) << 1) | ((codeword & 0xAAAA) >> 1); + + /* Flip adjacent 2-bit fields. */ + codeword = ((codeword & 0x3333) << 2) | ((codeword & 0xCCCC) >> 2); + + /* Flip adjacent 4-bit fields. */ + codeword = ((codeword & 0x0F0F) << 4) | ((codeword & 0xF0F0) >> 4); + + /* Flip adjacent 8-bit fields. */ + codeword = ((codeword & 0x00FF) << 8) | ((codeword & 0xFF00) >> 8); + + /* Return the high 'len' bits of the bit-reversed 16 bit value. */ + return codeword >> (16 - len); +} + /* * Generate the codewords for a canonical Huffman code. * @@ -1161,13 +1188,18 @@ next_codewords[len] = (next_codewords[len - 1] + len_counts[len - 1]) << 1; - for (sym = 0; sym < num_syms; sym++) - A[sym] = next_codewords[lens[sym]]++; + for (sym = 0; sym < num_syms; sym++) { + u8 len = lens[sym]; + u32 codeword = next_codewords[len]++; + + /* DEFLATE requires bit-reversed codewords. */ + A[sym] = reverse_codeword(codeword, len); + } } /* * --------------------------------------------------------------------- - * make_canonical_huffman_code() + * deflate_make_huffman_code() * --------------------------------------------------------------------- * * Given an alphabet and the frequency of each symbol in it, construct a @@ -1266,9 +1298,9 @@ * file: C/HuffEnc.c), which was placed in the public domain by Igor Pavlov. */ static void -make_canonical_huffman_code(unsigned num_syms, unsigned max_codeword_len, - const u32 freqs[restrict], - u8 lens[restrict], u32 codewords[restrict]) +deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len, + const u32 freqs[restrict], + u8 lens[restrict], u32 codewords[restrict]) { u32 *A = codewords; unsigned num_used_syms; @@ -1352,53 +1384,11 @@ memset(&c->freqs, 0, sizeof(c->freqs)); } -/* Reverse the Huffman codeword 'codeword', which is 'len' bits in length. */ -static u32 -deflate_reverse_codeword(u32 codeword, u8 len) -{ - /* - * The following branchless algorithm is faster than going bit by bit. - * Note: since no codewords are longer than 16 bits, we only need to - * reverse the low 16 bits of the 'u32'. - */ - STATIC_ASSERT(DEFLATE_MAX_CODEWORD_LEN <= 16); - - /* Flip adjacent 1-bit fields. */ - codeword = ((codeword & 0x5555) << 1) | ((codeword & 0xAAAA) >> 1); - - /* Flip adjacent 2-bit fields. */ - codeword = ((codeword & 0x3333) << 2) | ((codeword & 0xCCCC) >> 2); - - /* Flip adjacent 4-bit fields. */ - codeword = ((codeword & 0x0F0F) << 4) | ((codeword & 0xF0F0) >> 4); - - /* Flip adjacent 8-bit fields. */ - codeword = ((codeword & 0x00FF) << 8) | ((codeword & 0xFF00) >> 8); - - /* Return the high 'len' bits of the bit-reversed 16 bit value. */ - return codeword >> (16 - len); -} - -/* Make a canonical Huffman code with bit-reversed codewords. */ -static void -deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len, - const u32 freqs[], u8 lens[], u32 codewords[]) -{ - unsigned sym; - - make_canonical_huffman_code(num_syms, max_codeword_len, - freqs, lens, codewords); - - for (sym = 0; sym < num_syms; sym++) - codewords[sym] = deflate_reverse_codeword(codewords[sym], - lens[sym]); -} - /* * Build the literal/length and offset Huffman codes for a DEFLATE block. * - * This takes as input the frequency tables for each code and produces as output - * a set of tables that map symbols to codewords and codeword lengths. + * This takes as input the frequency tables for each alphabet and produces as + * output a set of tables that map symbols to codewords and codeword lengths. */ static void deflate_make_huffman_codes(const struct deflate_freqs *freqs, @@ -1633,9 +1623,8 @@ /* Output the lengths of the codewords in the precode. */ for (i = 0; i < c->num_explicit_lens; i++) { - deflate_add_bits(os, c->precode_lens[ + deflate_write_bits(os, c->precode_lens[ deflate_precode_lens_permutation[i]], 3); - deflate_flush_bits(os); } /* Output the encoded lengths of the codewords in the larger code. */ @@ -1719,13 +1708,51 @@ do { unsigned lit = *in_next++; - deflate_add_bits(os, codes->codewords.litlen[lit], - codes->lens.litlen[lit]); - deflate_flush_bits(os); + deflate_write_bits(os, codes->codewords.litlen[lit], + codes->lens.litlen[lit]); } while (--litrunlen); #endif } +static forceinline void +deflate_write_match(struct deflate_output_bitstream * restrict os, + unsigned length, unsigned length_slot, + unsigned offset, unsigned offset_symbol, + const struct deflate_codes * restrict codes) +{ + unsigned litlen_symbol = DEFLATE_FIRST_LEN_SYM + length_slot; + + /* Litlen symbol */ + deflate_add_bits(os, codes->codewords.litlen[litlen_symbol], + codes->lens.litlen[litlen_symbol]); + + /* Extra length bits */ + STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + + DEFLATE_MAX_EXTRA_LENGTH_BITS)); + deflate_add_bits(os, length - deflate_length_slot_base[length_slot], + deflate_extra_length_bits[length_slot]); + + if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + + DEFLATE_MAX_EXTRA_LENGTH_BITS + + MAX_OFFSET_CODEWORD_LEN + + DEFLATE_MAX_EXTRA_OFFSET_BITS)) + deflate_flush_bits(os); + + /* Offset symbol */ + deflate_add_bits(os, codes->codewords.offset[offset_symbol], + codes->lens.offset[offset_symbol]); + + if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN + + DEFLATE_MAX_EXTRA_OFFSET_BITS)) + deflate_flush_bits(os); + + /* Extra offset bits */ + deflate_add_bits(os, offset - deflate_offset_slot_base[offset_symbol], + deflate_extra_offset_bits[offset_symbol]); + + deflate_flush_bits(os); +} + static void deflate_write_sequences(struct deflate_output_bitstream * restrict os, const struct deflate_codes * restrict codes, @@ -1737,9 +1764,6 @@ for (;;) { u32 litrunlen = seq->litrunlen_and_length & SEQ_LITRUNLEN_MASK; unsigned length = seq->litrunlen_and_length >> SEQ_LENGTH_SHIFT; - unsigned length_slot; - unsigned litlen_symbol; - unsigned offset_symbol; if (litrunlen) { deflate_write_literal_run(os, in_next, litrunlen, @@ -1750,44 +1774,10 @@ if (length == 0) return; - in_next += length; - - length_slot = seq->length_slot; - litlen_symbol = DEFLATE_FIRST_LEN_SYM + length_slot; - - /* Litlen symbol */ - deflate_add_bits(os, codes->codewords.litlen[litlen_symbol], - codes->lens.litlen[litlen_symbol]); - - /* Extra length bits */ - STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + - DEFLATE_MAX_EXTRA_LENGTH_BITS)); - deflate_add_bits(os, - length - deflate_length_slot_base[length_slot], - deflate_extra_length_bits[length_slot]); - - if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + - DEFLATE_MAX_EXTRA_LENGTH_BITS + - MAX_OFFSET_CODEWORD_LEN + - DEFLATE_MAX_EXTRA_OFFSET_BITS)) - deflate_flush_bits(os); - - /* Offset symbol */ - offset_symbol = seq->offset_symbol; - deflate_add_bits(os, codes->codewords.offset[offset_symbol], - codes->lens.offset[offset_symbol]); - - if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN + - DEFLATE_MAX_EXTRA_OFFSET_BITS)) - deflate_flush_bits(os); - - /* Extra offset bits */ - deflate_add_bits(os, seq->offset - - deflate_offset_slot_base[offset_symbol], - deflate_extra_offset_bits[offset_symbol]); - - deflate_flush_bits(os); + deflate_write_match(os, length, seq->length_slot, + seq->offset, seq->offset_symbol, codes); + in_next += length; seq++; } } @@ -1797,10 +1787,6 @@ * Follow the minimum-cost path in the graph of possible match/literal choices * for the current block and write out the matches/literals using the specified * Huffman codes. - * - * Note: this is slightly duplicated with deflate_write_sequences(), the reason - * being that we don't want to waste time translating between intermediate - * match/literal representations. */ static void deflate_write_item_list(struct deflate_output_bitstream *os, @@ -1814,51 +1800,18 @@ do { unsigned length = cur_node->item & OPTIMUM_LEN_MASK; unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT; - unsigned litlen_symbol; - unsigned length_slot; - unsigned offset_slot; if (length == 1) { /* Literal */ - litlen_symbol = offset; - deflate_add_bits(os, - codes->codewords.litlen[litlen_symbol], - codes->lens.litlen[litlen_symbol]); - deflate_flush_bits(os); + deflate_write_bits(os, codes->codewords.litlen[offset], + codes->lens.litlen[offset]); } else { - /* Match length */ - length_slot = deflate_length_slot[length]; - litlen_symbol = DEFLATE_FIRST_LEN_SYM + length_slot; - deflate_add_bits(os, - codes->codewords.litlen[litlen_symbol], - codes->lens.litlen[litlen_symbol]); - - deflate_add_bits(os, - length - deflate_length_slot_base[length_slot], - deflate_extra_length_bits[length_slot]); - - if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + - DEFLATE_MAX_EXTRA_LENGTH_BITS + - MAX_OFFSET_CODEWORD_LEN + - DEFLATE_MAX_EXTRA_OFFSET_BITS)) - deflate_flush_bits(os); - - - /* Match offset */ - offset_slot = c->p.n.offset_slot_full[offset]; - deflate_add_bits(os, - codes->codewords.offset[offset_slot], - codes->lens.offset[offset_slot]); - - if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN + - DEFLATE_MAX_EXTRA_OFFSET_BITS)) - deflate_flush_bits(os); - - deflate_add_bits(os, - offset - deflate_offset_slot_base[offset_slot], - deflate_extra_offset_bits[offset_slot]); - - deflate_flush_bits(os); + /* Match */ + deflate_write_match(os, length, + deflate_length_slot[length], + offset, + c->p.n.offset_slot_full[offset], + codes); } cur_node += length; } while (cur_node != end_node); @@ -1870,9 +1823,8 @@ deflate_write_end_of_block(struct deflate_output_bitstream *os, const struct deflate_codes *codes) { - deflate_add_bits(os, codes->codewords.litlen[DEFLATE_END_OF_BLOCK], - codes->lens.litlen[DEFLATE_END_OF_BLOCK]); - deflate_flush_bits(os); + deflate_write_bits(os, codes->codewords.litlen[DEFLATE_END_OF_BLOCK], + codes->lens.litlen[DEFLATE_END_OF_BLOCK]); } static void @@ -2054,19 +2006,19 @@ * literals we only look at the high bits and low bits, and for matches we only * look at whether the match is long or not. The assumption is that for typical * "real" data, places that are good block boundaries will tend to be noticeable - * based only on changes in these aggregate frequencies, without looking for + * based only on changes in these aggregate probabilities, without looking for * subtle differences in individual symbols. For example, a change from ASCII * bytes to non-ASCII bytes, or from few matches (generally less compressible) * to many matches (generally more compressible), would be easily noticed based * on the aggregates. * - * For determining whether the frequency distributions are "different enough" to - * start a new block, the simply heuristic of splitting when the sum of absolute - * differences exceeds a constant seems to be good enough. We also add a number - * proportional to the block length so that the algorithm is more likely to end - * long blocks than short blocks. This reflects the general expectation that it - * will become increasingly beneficial to start a new block as the current - * block grows longer. + * For determining whether the probability distributions are "different enough" + * to start a new block, the simple heuristic of splitting when the sum of + * absolute differences exceeds a constant seems to be good enough. We also add + * a number proportional to the block length so that the algorithm is more + * likely to end long blocks than short blocks. This reflects the general + * expectation that it will become increasingly beneficial to start a new block + * as the current block grows longer. * * Finally, for an approximation, it is not strictly necessary that the exact * symbols being used are considered. With "near-optimal parsing", for example, @@ -2130,9 +2082,14 @@ { if (stats->num_observations > 0) { /* - * Note: to avoid slow divisions, we do not divide by - * 'num_observations', but rather do all math with the numbers - * multiplied by 'num_observations'. + * Compute the sum of absolute differences of probabilities. To + * avoid needing to use floating point arithmetic or do slow + * divisions, we do all arithmetic with the probabilities + * multiplied by num_observations * num_new_observations. E.g., + * for the "old" observations the probabilities would be + * (double)observations[i] / num_observations, but since we + * multiply by both num_observations and num_new_observations we + * really do observations[i] * num_new_observations. */ u32 total_delta = 0; u32 num_items; @@ -2152,6 +2109,12 @@ num_items = stats->num_observations + stats->num_new_observations; + /* + * Heuristic: the cutoff is when the sum of absolute differences + * of probabilities becomes at least 200/512. As above, the + * probability is multiplied by both num_new_observations and + * num_observations. Be careful to avoid integer overflow. + */ cutoff = stats->num_new_observations * 200 / 512 * stats->num_observations; /* diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libdeflate-1.9/lib/deflate_decompress.c new/libdeflate-1.10/lib/deflate_decompress.c --- old/libdeflate-1.9/lib/deflate_decompress.c 2022-01-12 06:24:28.000000000 +0100 +++ new/libdeflate-1.10/lib/deflate_decompress.c 2022-02-06 23:48:22.000000000 +0100 @@ -99,11 +99,6 @@ #define OFFSET_ENOUGH 402 /* enough 32 8 15 */ /* - * Type for codeword lengths. - */ -typedef u8 len_t; - -/* * The main DEFLATE decompressor structure. Since this implementation only * supports full buffer decompression, this structure does not store the entire * decompression state, but rather only some arrays that are too large to @@ -121,12 +116,12 @@ */ union { - len_t precode_lens[DEFLATE_NUM_PRECODE_SYMS]; + u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS]; struct { - len_t lens[DEFLATE_NUM_LITLEN_SYMS + - DEFLATE_NUM_OFFSET_SYMS + - DEFLATE_MAX_LENS_OVERRUN]; + u8 lens[DEFLATE_NUM_LITLEN_SYMS + + DEFLATE_NUM_OFFSET_SYMS + + DEFLATE_MAX_LENS_OVERRUN]; u32 precode_decode_table[PRECODE_ENOUGH]; } l; @@ -204,25 +199,27 @@ * * If we would overread the input buffer, we just don't read anything, leaving * the bits zeroed but marking them filled. This simplifies the decompressor - * because it removes the need to distinguish between real overreads and - * overreads that occur only because of the decompressor's own lookahead. + * because it removes the need to always be able to distinguish between real + * overreads and overreads caused only by the decompressor's own lookahead. * - * The disadvantage is that real overreads are not detected immediately. - * However, this is safe because the decompressor is still guaranteed to make - * forward progress when presented never-ending 0 bits. In an existing block - * output will be getting generated, whereas new blocks can only be uncompressed - * (since the type code for uncompressed blocks is 0), for which we check for - * previous overread. But even if we didn't check, uncompressed blocks would - * fail to validate because LEN would not equal ~NLEN. So the decompressor will - * eventually either detect that the output buffer is full, or detect invalid - * input, or finish the final block. + * We do still keep track of the number of bytes that have been overread, for + * two reasons. First, it allows us to determine the exact number of bytes that + * were consumed once the stream ends or an uncompressed block is reached. + * Second, it allows us to stop early if the overread amount gets so large (more + * than sizeof bitbuf) that it can only be caused by a real overread. (The + * second part is arguably unneeded, since libdeflate is buffer-based; given + * infinite zeroes, it will eventually either completely fill the output buffer + * or return an error. However, we do it to be slightly more friendly to the + * not-recommended use case of decompressing with an unknown output size.) */ #define FILL_BITS_BYTEWISE() \ do { \ - if (likely(in_next != in_end)) \ + if (likely(in_next != in_end)) { \ bitbuf |= (bitbuf_t)*in_next++ << bitsleft; \ - else \ - overrun_count++; \ + } else { \ + overread_count++; \ + SAFETY_CHECK(overread_count <= sizeof(bitbuf)); \ + } \ bitsleft += 8; \ } while (bitsleft <= BITBUF_NBITS - 8) @@ -307,16 +304,16 @@ */ #define ALIGN_INPUT() \ do { \ - SAFETY_CHECK(overrun_count <= (bitsleft >> 3)); \ - in_next -= (bitsleft >> 3) - overrun_count; \ - overrun_count = 0; \ + SAFETY_CHECK(overread_count <= (bitsleft >> 3)); \ + in_next -= (bitsleft >> 3) - overread_count; \ + overread_count = 0; \ bitbuf = 0; \ bitsleft = 0; \ } while(0) /* * Read a 16-bit value from the input. This must have been preceded by a call - * to ALIGN_INPUT(), and the caller must have already checked for overrun. + * to ALIGN_INPUT(), and the caller must have already checked for overread. */ #define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16) @@ -554,7 +551,7 @@ */ static bool build_decode_table(u32 decode_table[], - const len_t lens[], + const u8 lens[], const unsigned num_syms, const u32 decode_results[], const unsigned table_bits, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libdeflate-1.9/libdeflate.h new/libdeflate-1.10/libdeflate.h --- old/libdeflate-1.9/libdeflate.h 2022-01-12 06:24:28.000000000 +0100 +++ new/libdeflate-1.10/libdeflate.h 2022-02-06 23:48:22.000000000 +0100 @@ -10,8 +10,8 @@ #endif #define LIBDEFLATE_VERSION_MAJOR 1 -#define LIBDEFLATE_VERSION_MINOR 9 -#define LIBDEFLATE_VERSION_STRING "1.9" +#define LIBDEFLATE_VERSION_MINOR 10 +#define LIBDEFLATE_VERSION_STRING "1.10" #include <stddef.h> #include <stdint.h> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libdeflate-1.9/programs/gzip.c new/libdeflate-1.10/programs/gzip.c --- old/libdeflate-1.9/programs/gzip.c 2022-01-12 06:24:28.000000000 +0100 +++ new/libdeflate-1.10/programs/gzip.c 2022-02-06 23:48:22.000000000 +0100 @@ -192,6 +192,7 @@ size_t compressed_size = in->mmap_size; void *uncompressed_data = NULL; size_t uncompressed_size; + size_t max_uncompressed_size; size_t actual_in_nbytes; size_t actual_out_nbytes; enum libdeflate_result result; @@ -214,8 +215,23 @@ if (uncompressed_size == 0) uncompressed_size = 1; + /* + * DEFLATE cannot expand data more than 1032x, so there's no need to + * ever allocate a buffer more than 1032 times larger than the + * compressed data. This is a fail-safe, albeit not a very good one, if + * ISIZE becomes corrupted on a small file. (The 1032x number comes + * from each 2 bits generating a 258-byte match. This is a hard upper + * bound; the real upper bound is slightly smaller due to overhead.) + */ + if (compressed_size <= SIZE_MAX / 1032) + max_uncompressed_size = compressed_size * 1032; + else + max_uncompressed_size = SIZE_MAX; + do { if (uncompressed_data == NULL) { + uncompressed_size = MIN(uncompressed_size, + max_uncompressed_size); uncompressed_data = xmalloc(uncompressed_size); if (uncompressed_data == NULL) { msg("%"TS": file is probably too large to be " @@ -234,6 +250,11 @@ &actual_out_nbytes); if (result == LIBDEFLATE_INSUFFICIENT_SPACE) { + if (uncompressed_size >= max_uncompressed_size) { + msg("Bug in libdeflate_gzip_decompress_ex(): data expanded too much!"); + ret = -1; + goto out; + } if (uncompressed_size * 2 <= uncompressed_size) { msg("%"TS": file corrupt or too large to be " "processed by this program", in->name); @@ -256,7 +277,7 @@ if (actual_in_nbytes == 0 || actual_in_nbytes > compressed_size || actual_out_nbytes > uncompressed_size) { - msg("Bug in libdeflate_gzip_decompress_ex()!"); + msg("Bug in libdeflate_gzip_decompress_ex(): impossible actual_nbytes value!"); ret = -1; goto out; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libdeflate-1.9/programs/test_overread.c new/libdeflate-1.10/programs/test_overread.c --- old/libdeflate-1.9/programs/test_overread.c 1970-01-01 01:00:00.000000000 +0100 +++ new/libdeflate-1.10/programs/test_overread.c 2022-02-06 23:48:22.000000000 +0100 @@ -0,0 +1,95 @@ +/* + * test_overread.c + * + * Test that the decompressor doesn't produce an unbounded amount of output if + * it runs out of input, even when implicit zeroes appended to the input would + * continue producing output (as is the case when the input ends during a + * DYNAMIC_HUFFMAN block where a literal has an all-zeroes codeword). + * + * This is a regression test for commit 3f21ec9d6121 ("deflate_decompress: error + * out if overread count gets too large"). + */ + +#include "test_util.h" + +static void +generate_test_input(struct output_bitstream *os) +{ + int i; + + put_bits(os, 0, 1); /* BFINAL: 0 */ + put_bits(os, 2, 2); /* BTYPE: DYNAMIC_HUFFMAN */ + + /* + * Write the Huffman codes. + * + * Litlen code: + * litlensym_0 (0) len=1 codeword=0 + * litlensym_256 (end-of-block) len=1 codeword=1 + * Offset code: + * offsetsym_0 (unused) len=1 codeword=0 + * + * Litlen and offset codeword lengths: + * [0] = 1 presym_1 + * [1..255] = 0 presym_{18,18} + * [256] = 1 presym_1 + * [257] = 1 presym_1 + * + * Precode: + * presym_1 len=1 codeword=0 + * presym_18 len=1 codeword=1 + */ + put_bits(os, 0, 5); /* num_litlen_syms: 0 + 257 */ + put_bits(os, 0, 5); /* num_offset_syms: 0 + 1 */ + put_bits(os, 14, 4); /* num_explicit_precode_lens: 14 + 4 */ + /* + * Precode codeword lengths: order is + * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] + */ + put_bits(os, 0, 3); /* presym_16: len=0 */ + put_bits(os, 0, 3); /* presym_17: len=0 */ + put_bits(os, 1, 3); /* presym_18: len=1 */ + for (i = 0; i < 14; i++) /* presym_{0,...,14}: len=0 */ + put_bits(os, 0, 3); + put_bits(os, 1, 3); /* presym_1: len=1 */ + + /* Litlen and offset codeword lengths */ + put_bits(os, 0, 1); /* presym_1 */ + put_bits(os, 1, 1); /* presym_18 ... */ + put_bits(os, 117, 7); /* ... 11 + 117 zeroes */ + put_bits(os, 1, 1); /* presym_18 ... */ + put_bits(os, 116, 7); /* ... 11 + 116 zeroes */ + put_bits(os, 0, 1); /* presym_1 */ + put_bits(os, 0, 1); /* presym_1 */ + + /* Implicit zeroes would generate endless literals from here. */ + + ASSERT(flush_bits(os)); +} + +int +tmain(int argc, tchar *argv[]) +{ + u8 cdata[16]; + u8 udata[256]; + struct output_bitstream os = + { .next = cdata, .end = cdata + sizeof(cdata) }; + struct libdeflate_decompressor *d; + enum libdeflate_result res; + size_t actual_out_nbytes; + + begin_program(argv); + + generate_test_input(&os); + d = libdeflate_alloc_decompressor(); + ASSERT(d != NULL); + + res = libdeflate_deflate_decompress(d, cdata, os.next - cdata, + udata, sizeof(udata), + &actual_out_nbytes); + /* Before the fix, the result was LIBDEFLATE_INSUFFICIENT_SPACE here. */ + ASSERT(res == LIBDEFLATE_BAD_DATA); + + libdeflate_free_decompressor(d); + return 0; +}