In zlib_compress_pages(), if the last looped call to zlib_deflate ends between 1 and 5 bytes short of a page boundary, the final call with Z_FINISH is unable to write the final 6 bytes of the zlib stream and bails out. This causes compress_file_range() in inode.c to distrust the compressor and flag ths inode nocompress, with the end result that the rest of a potentially highly-compressible file is stored in expanded form.

You can demonstrate this with the following script, setting MOUNTPOINT to an otherwise-unused btrfs mount with compress=zlib:

MOUNTPOINT=/mnt
sync; df $MOUNTPOINT
{ head -c 6630 /usr/src/linux/Documentation/BUG-HUNTING;
  cat /dev/zero; } | head -c 1310720 > $MOUNTPOINT/test
sync; df $MOUNTPOINT
rm -f $MOUNTPOINT/test

The selection of 6630 bytes from BUG-HUNTING plus nulls provides for a first 128KiB chunk that compresses to just over 4 KiB, and the other nine chunks compress easily within a single page each, so this should compress to 11 pages or 44 KiB; instead it uses significantly more, depending on how many threads were able to compress chunks before nocompress was set.

The following patch fixes this by moving the Z_FINISH calls to inside the page read/write loop, allowing it to call as many times as needed.

Signed-off-by: Danielle Church <dchu...@cheri.shyou.org>
---
 fs/btrfs/zlib.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index fb22fd8..1dc0455 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -89,6 +89,7 @@ static int zlib_compress_pages(struct list_head *ws,
        struct page *in_page = NULL;
        struct page *out_page = NULL;
        unsigned long bytes_left;
+       int deflate_flush = Z_SYNC_FLUSH;

        *out_pages = 0;
        *total_out = 0;
@@ -120,8 +121,12 @@ static int zlib_compress_pages(struct list_head *ws,
        workspace->strm.avail_out = PAGE_CACHE_SIZE;
        workspace->strm.avail_in = min(len, PAGE_CACHE_SIZE);

-       while (workspace->strm.total_in < len) {
-               ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
+       while (deflate_flush != Z_FINISH || ret != Z_STREAM_END) {
+               ret = zlib_deflate(&workspace->strm, deflate_flush);
+               /* we're all done, including the stream end */
+               if (ret == Z_STREAM_END && deflate_flush == Z_FINISH)
+                       break;
+
                if (ret != Z_OK) {
                        printk(KERN_DEBUG "BTRFS: deflate in loop returned 
%d\n",
                               ret);
@@ -159,9 +164,12 @@ static int zlib_compress_pages(struct list_head *ws,
                        workspace->strm.avail_out = PAGE_CACHE_SIZE;
                        workspace->strm.next_out = cpage_out;
                }
-               /* we're all done */
-               if (workspace->strm.total_in >= len)
-                       break;
+               /* we're all done with input data; keep looping until stream 
end is written */
+               if (workspace->strm.total_in >= len) {
+                       deflate_flush = Z_FINISH;
+                       workspace->strm.avail_in = 0;
+                       continue;
+               }

                /* we've read in a full page, get a new one */
                if (workspace->strm.avail_in == 0) {
@@ -181,11 +189,11 @@ static int zlib_compress_pages(struct list_head *ws,
                        workspace->strm.next_in = data_in;
                }
        }
-       workspace->strm.avail_in = 0;
-       ret = zlib_deflate(&workspace->strm, Z_FINISH);
-       zlib_deflateEnd(&workspace->strm);
+       ret = zlib_deflateEnd(&workspace->strm);

-       if (ret != Z_STREAM_END) {
+       if (ret != Z_OK) {
+               printk(KERN_DEBUG "BTRFS: deflateEnd returned %d\n",
+                      ret);
                ret = -EIO;
                goto out;
        }
--
2.2.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to