On Friday 22 October 2010 20:07, Dan Fandrich wrote:
> pbzip2 is a parallel bzip2 compressor that uses multiple threads while
> compressing to linearly speed bzip2 compression by the number of cores
> available (see http://compression.ca/pbzip2/). The files it produces
> are compatible with traditional bzip2, but have slightly different output
> because of the way the independently-compressed blocks are concatenated.
> 
> Unfortunately, this seems to prevent Busybox's bunzip2 from decompressing
> them fully. Busybox decompresses the first block, then silently stops without
> indicating any sort of error.  It can be reproduced like this:
> 
> $ cp /lib/libc-2.9.so bigfile   #need a file larger than 900K
> $ pbzip2 -9 bigfile
> $ bzcat bigfile.bz2 | wc -c
> 1331404
> $ busybox bzcat bigfile.bz2 | wc -c
> 900000
> $ busybox bunzip2 bigfile.bz2
> $ echo $?
> 0
> $ stat -c %s bigfile
> 900000

Please try attached patch
-- 
vda
diff -ad -urpN busybox.6/archival/libunarchive/decompress_bunzip2.c busybox.7/archival/libunarchive/decompress_bunzip2.c
--- busybox.6/archival/libunarchive/decompress_bunzip2.c	2010-10-28 19:04:00.000000000 +0200
+++ busybox.7/archival/libunarchive/decompress_bunzip2.c	2010-10-28 23:06:53.000000000 +0200
@@ -44,7 +44,7 @@
 #define RETVAL_LAST_BLOCK               (-1)
 #define RETVAL_NOT_BZIP_DATA            (-2)
 #define RETVAL_UNEXPECTED_INPUT_EOF     (-3)
-#define RETVAL_SHORT_WRITE              (-4)
+//#define RETVAL_SHORT_WRITE              (-4)
 #define RETVAL_DATA_ERROR               (-5)
 #define RETVAL_OUT_OF_MEMORY            (-6)
 #define RETVAL_OBSOLETE_INPUT           (-7)
@@ -584,8 +584,8 @@ int FAST_FUNC read_bunzip(bunzip_data *b
 /* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
    should work for NOFORK applets too, we must be extremely careful to not leak
    any allocations! */
-int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf,
-						int len)
+int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
+		const void *inbuf, int len)
 {
 	bunzip_data *bd;
 	unsigned i;
@@ -606,9 +606,11 @@ int FAST_FUNC start_bunzip(bunzip_data *
 	if (-1 == in_fd) {
 		/* in this case, bd->inbuf is read-only */
 		bd->inbuf = (void*)inbuf; /* cast away const-ness */
-		bd->inbufCount = len;
-	} else
+	} else {
 		bd->inbuf = (unsigned char *)(bd + 1);
+		memcpy(bd->inbuf, inbuf, len);
+	}
+	bd->inbufCount = len;
 
 	/* Init the CRC32 table (big endian) */
 	crc32_filltable(bd->crc32Table, 1);
@@ -652,37 +654,59 @@ IF_DESKTOP(long long) int FAST_FUNC
 unpack_bz2_stream(int src_fd, int dst_fd)
 {
 	IF_DESKTOP(long long total_written = 0;)
+		bunzip_data *bd;
 	char *outbuf;
-	bunzip_data *bd;
 	int i;
+	unsigned len;
 
 	outbuf = xmalloc(IOBUF_SIZE);
-	i = start_bunzip(&bd, src_fd, NULL, 0);
-	if (!i) {
-		for (;;) {
-			i = read_bunzip(bd, outbuf, IOBUF_SIZE);
-			if (i <= 0) break;
-			if (i != full_write(dst_fd, outbuf, i)) {
-				i = RETVAL_SHORT_WRITE;
-				break;
+	len = 0;
+	while (1) { /* "Process one BZ... stream" loop */
+
+		i = start_bunzip(&bd, src_fd, outbuf + 2, len);
+
+		if (i == 0) {
+			while (1) { /* "Produce some output bytes" loop */
+				i = read_bunzip(bd, outbuf, IOBUF_SIZE);
+				if (i <= 0)
+					break;
+				if (i != full_write(dst_fd, outbuf, i)) {
+					bb_error_msg("short write");
+					goto release_mem;
+				}
+				IF_DESKTOP(total_written += i;)
 			}
-			IF_DESKTOP(total_written += i;)
 		}
-	}
-
-	/* Check CRC and release memory */
 
-	if (i == RETVAL_LAST_BLOCK) {
+		if (i != RETVAL_LAST_BLOCK) {
+			bb_error_msg("bunzip error %d", i);
+			break;
+		}
 		if (bd->headerCRC != bd->totalCRC) {
 			bb_error_msg("CRC error");
-		} else {
-			i = RETVAL_OK;
+			break;
 		}
-	} else if (i == RETVAL_SHORT_WRITE) {
-		bb_error_msg("short write");
-	} else {
-		bb_error_msg("bunzip error %d", i);
+
+		/* Successfully unpacked one BZ stream */
+		i = RETVAL_OK;
+
+		/* Do we have "BZ..." after last processed byte?
+		 * pbzip2 (parallelized bzip2) produces such files.
+		 */
+		len = bd->inbufCount - bd->inbufPos;
+		memcpy(outbuf, &bd->inbuf[bd->inbufPos], len);
+		if (len < 2) {
+			if (safe_read(src_fd, outbuf + len, 2 - len) != 2 - len)
+				break;
+			len = 2;
+		}
+		if (*(uint16_t*)outbuf != BZIP2_MAGIC) /* "BZ"? */
+			break;
+		dealloc_bunzip(bd);
+		len -= 2;
 	}
+
+ release_mem:
 	dealloc_bunzip(bd);
 	free(outbuf);
 
diff -ad -urpN busybox.6/include/unarchive.h busybox.7/include/unarchive.h
--- busybox.6/include/unarchive.h	2010-10-28 19:04:00.000000000 +0200
+++ busybox.7/include/unarchive.h	2010-10-28 22:56:09.000000000 +0200
@@ -193,7 +193,7 @@ extern const llist_t *find_list_entry2(c
 
 /* A bit of bunzip2 internals are exposed for compressed help support: */
 typedef struct bunzip_data bunzip_data;
-int start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, int len) FAST_FUNC;
+int start_bunzip(bunzip_data **bdp, int in_fd, const void *inbuf, int len) FAST_FUNC;
 int read_bunzip(bunzip_data *bd, char *outbuf, int len) FAST_FUNC;
 void dealloc_bunzip(bunzip_data *bd) FAST_FUNC;
 
diff -ad -urpN busybox.6/libbb/appletlib.c busybox.7/libbb/appletlib.c
--- busybox.6/libbb/appletlib.c	2010-10-28 19:04:00.000000000 +0200
+++ busybox.7/libbb/appletlib.c	2010-10-28 22:56:09.000000000 +0200
@@ -75,7 +75,7 @@ static const char *unpack_usage_messages
 
 	i = start_bunzip(&bd,
 			/* src_fd: */ -1,
-			/* inbuf:  */ (void *)packed_usage,
+			/* inbuf:  */ packed_usage,
 			/* len:    */ sizeof(packed_usage));
 	/* read_bunzip can longjmp to start_bunzip, and ultimately
 	 * end up here with i != 0 on read data errors! Not trivial */
diff -ad -urpN busybox.6/miscutils/bbconfig.c busybox.7/miscutils/bbconfig.c
--- busybox.6/miscutils/bbconfig.c	2010-10-28 19:04:00.000000000 +0200
+++ busybox.7/miscutils/bbconfig.c	2010-10-28 22:56:09.000000000 +0200
@@ -15,7 +15,7 @@ int bbconfig_main(int argc UNUSED_PARAM,
 	bunzip_data *bd;
 	int i = start_bunzip(&bd,
 			/* src_fd: */ -1,
-			/* inbuf:  */ (void *)bbconfig_config_bz2,
+			/* inbuf:  */ bbconfig_config_bz2,
 			/* len:    */ sizeof(bbconfig_config_bz2));
 	/* read_bunzip can longjmp to start_bunzip, and ultimately
 	 * end up here with i != 0 on read data errors! Not trivial */
diff -ad -urpN busybox.6/testsuite/bunzip2.tests busybox.7/testsuite/bunzip2.tests
--- busybox.6/testsuite/bunzip2.tests	2010-10-28 19:04:00.000000000 +0200
+++ busybox.7/testsuite/bunzip2.tests	2010-10-28 22:56:09.000000000 +0200
@@ -463,6 +463,24 @@ $ECHO -ne "\x40\xa0\x00\x8b\x12\xe8\xfb\
 $ECHO -ne "\x01\xff\x8b\xb9\x22\x9c\x28\x48\x5f\xa5\xca\xf3\x80"
 }
 
+pbzip_4m_zeros() {
+$ECHO -ne "\x42\x5a\x68\x31\x31\x41\x59\x26\x53\x59\x63\xe3\xec\xa2\x00\x06"
+$ECHO -ne "\xe4\xc1\x00\xc0\x00\x02\x00\x00\x08\x20\x00\x30\xcc\x09\xaa\x69"
+$ECHO -ne "\x94\xa1\x36\xa9\x28\x4f\x17\x72\x45\x38\x50\x90\x63\xe3\xec\xa2"
+$ECHO -ne "\x42\x5a\x68\x31\x31\x41\x59\x26\x53\x59\x63\xe3\xec\xa2\x00\x06"
+$ECHO -ne "\xe4\xc1\x00\xc0\x00\x02\x00\x00\x08\x20\x00\x30\xcc\x09\xaa\x69"
+$ECHO -ne "\x94\xa1\x36\xa9\x28\x4f\x17\x72\x45\x38\x50\x90\x63\xe3\xec\xa2"
+$ECHO -ne "\x42\x5a\x68\x31\x31\x41\x59\x26\x53\x59\x63\xe3\xec\xa2\x00\x06"
+$ECHO -ne "\xe4\xc1\x00\xc0\x00\x02\x00\x00\x08\x20\x00\x30\xcc\x09\xaa\x69"
+$ECHO -ne "\x94\xa1\x36\xa9\x28\x4f\x17\x72\x45\x38\x50\x90\x63\xe3\xec\xa2"
+$ECHO -ne "\x42\x5a\x68\x31\x31\x41\x59\x26\x53\x59\x63\xe3\xec\xa2\x00\x06"
+$ECHO -ne "\xe4\xc1\x00\xc0\x00\x02\x00\x00\x08\x20\x00\x30\xcc\x09\xaa\x69"
+$ECHO -ne "\x94\xa1\x36\xa9\x28\x4f\x17\x72\x45\x38\x50\x90\x63\xe3\xec\xa2"
+$ECHO -ne "\x42\x5a\x68\x31\x31\x41\x59\x26\x53\x59\xc9\xb5\x21\xef\x00\x04"
+$ECHO -ne "\x8d\x40\x20\xc0\x00\x01\x00\x00\x08\x20\x00\x30\xcc\x05\x29\xa6"
+$ECHO -ne "\x4a\x11\xb1\x4a\x11\xe2\xee\x48\xa7\x0a\x12\x19\x36\xa4\x3d\xe0"
+}
+
 prep() {
     rm -f t*
     hello_$ext >t1.$ext
@@ -520,9 +538,18 @@ if test "${0##*/}" = "bunzip2.tests"; th
     if test1_bz2 | ${bb}bunzip2 >/dev/null \
 	&& test "`test1_bz2 | ${bb}bunzip2 | md5sum`" = "61bbeee4be9c6f110a71447f584fda7b  -"
     then
-	echo "PASS: $unpack: test bz2 file"
+	echo "PASS: $unpack: test_bz2 file"
     else
-	echo "FAIL: $unpack: test bz2 file"
+	echo "FAIL: $unpack: test_bz2 file"
+	FAILCOUNT=$((FAILCOUNT + 1))
+    fi
+
+    if pbzip_4m_zeros | ${bb}bunzip2 >/dev/null \
+	&& test "`pbzip_4m_zeros | ${bb}bunzip2 | md5sum`" = "b5cfa9d6c8febd618f91ac2843d50a1c  -"
+    then
+	echo "PASS: $unpack: pbzip_4m_zeros file"
+    else
+	echo "FAIL: $unpack: pbzip_4m_zeros file"
 	FAILCOUNT=$((FAILCOUNT + 1))
     fi
 fi
_______________________________________________
busybox mailing list
busybox@busybox.net
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to