Hi, The performance and number of processes for a "depmod -a" with gzipped modules was abysmal. This patch adds a fast path without fork for well- behaved gzip files, benefiting all users of xmalloc_open_zipped_read_close.
"modinfo radeon.ko.gz", a single-file reader, got 30% faster. "depmod -a", which used to fork over 800 times, got 20% faster. And of course a whole lot less processes -> much saved RAM. function old new delta inflate_get_next_window - 1877 +1877 xmalloc_unpack_gz - 356 +356 check_header_gzip - 298 +298 xmalloc_inflate_unzip_internal - 223 +223 inflate_init - 97 +97 xmalloc_open_zipped_read_close 73 159 +86 inflate_store_unused - 35 +35 unpack_gz_stream 567 299 -268 inflate_unzip_internal 2304 172 -2132 ------------------------------------------------------------------------------ (add/remove: 6/0 grow/shrink: 1/2 up/down: 2972/-2400) Total: 572 bytes -- It's currently guarded by CONFIG_DESKTOP. If you'd like a new config option instead, please say so. - Lauri -- http://www.fastmail.com - Choose from over 50 domains or use your own
From 4eaee06a748ce07b004b40aa10fa1bcc3e5e8928 Mon Sep 17 00:00:00 2001 From: Lauri Kasanen <cur...@operamail.com> Date: Thu, 27 Nov 2014 14:48:17 +0200 Subject: [PATCH] Add a gzip fastpath for the xmalloc readers The performance and number of processes for a "depmod -a" with gzipped modules was abysmal. This patch adds a fast path without fork for well- behaved gzip files, benefiting all users of xmalloc_open_zipped_read_close. "modinfo radeon.ko.gz", a single-file reader, got 30% faster. "depmod -a", which used to fork over 800 times, got 20% faster. And of course a whole lot less processes -> much saved RAM. function old new delta inflate_get_next_window - 1877 +1877 xmalloc_unpack_gz - 356 +356 check_header_gzip - 298 +298 xmalloc_inflate_unzip_internal - 223 +223 inflate_init - 97 +97 xmalloc_open_zipped_read_close 73 159 +86 inflate_store_unused - 35 +35 unpack_gz_stream 567 299 -268 inflate_unzip_internal 2304 172 -2132 ------------------------------------------------------------------------------ (add/remove: 6/0 grow/shrink: 1/2 up/down: 2972/-2400) Total: 572 bytes Signed-off-by: Lauri Kasanen <cur...@operamail.com> --- archival/libarchive/decompress_gunzip.c | 155 ++++++++++++++++++++++++++++---- archival/libarchive/open_transformer.c | 16 ++++ include/bb_archive.h | 1 + 3 files changed, 156 insertions(+), 16 deletions(-) diff --git a/archival/libarchive/decompress_gunzip.c b/archival/libarchive/decompress_gunzip.c index 7c6f38e..1f68ebd 100644 --- a/archival/libarchive/decompress_gunzip.c +++ b/archival/libarchive/decompress_gunzip.c @@ -968,19 +968,12 @@ static int inflate_get_next_window(STATE_PARAM_ONLY) /* Doesnt get here */ } - -/* Called from unpack_gz_stream() and inflate_unzip() */ -static IF_DESKTOP(long long) int -inflate_unzip_internal(STATE_PARAM int in, int out) +static void inflate_init(STATE_PARAM_ONLY) { - IF_DESKTOP(long long) int n = 0; - ssize_t nwrote; - /* Allocate all global buffers (for DYN_ALLOC option) */ gunzip_window = xmalloc(GUNZIP_WSIZE); gunzip_outbuf_count = 0; gunzip_bytes_out = 0; - gunzip_src_fd = in; /* (re) initialize state */ method = -1; @@ -994,6 +987,31 @@ inflate_unzip_internal(STATE_PARAM int in, int out) gunzip_crc = ~0; error_msg = "corrupted data"; +} + +static void inflate_store_unused(STATE_PARAM_ONLY) +{ + /* Store unused bytes in a global buffer so calling applets can access it */ + if (gunzip_bk >= 8) { + /* Undo too much lookahead. The next read will be byte aligned + * so we can discard unused bits in the last meaningful byte. */ + bytebuffer_offset--; + bytebuffer[bytebuffer_offset] = gunzip_bb & 0xff; + gunzip_bb >>= 8; + gunzip_bk -= 8; + } +} + +/* Called from unpack_gz_stream() and inflate_unzip() */ +static IF_DESKTOP(long long) int +inflate_unzip_internal(STATE_PARAM int in, int out) +{ + IF_DESKTOP(long long) int n = 0; + ssize_t nwrote; + + gunzip_src_fd = in; + inflate_init(PASS_STATE_ONLY); + if (setjmp(error_jmp)) { /* Error from deep inside zip machinery */ n = -1; @@ -1012,15 +1030,48 @@ inflate_unzip_internal(STATE_PARAM int in, int out) if (r == 0) break; } - /* Store unused bytes in a global buffer so calling applets can access it */ - if (gunzip_bk >= 8) { - /* Undo too much lookahead. The next read will be byte aligned - * so we can discard unused bits in the last meaningful byte. */ - bytebuffer_offset--; - bytebuffer[bytebuffer_offset] = gunzip_bb & 0xff; - gunzip_bb >>= 8; - gunzip_bk -= 8; + inflate_store_unused(PASS_STATE_ONLY); + ret: + /* Cleanup */ + free(gunzip_window); + free(gunzip_crc_table); + return n; +} + +static long long int +xmalloc_inflate_unzip_internal(STATE_PARAM int in, char **outptr, const size_t uncompressed) +{ + long long int n = 0; + char *buf = xmalloc(uncompressed + 1); + + gunzip_src_fd = in; + inflate_init(PASS_STATE_ONLY); + + if (setjmp(error_jmp)) { + /* Error from deep inside zip machinery */ + n = -1; + free(buf); + goto ret; + } + + while (1) { + int r = inflate_get_next_window(PASS_STATE_ONLY); + if (n + gunzip_outbuf_count > uncompressed) { + /* The gzip trailer lied. Fall back on the fork method. */ + n = -1; + free(buf); + goto ret; + } + memcpy(buf + n, gunzip_window, gunzip_outbuf_count); + n += gunzip_outbuf_count; + if (r == 0) break; } + + inflate_store_unused(PASS_STATE_ONLY); + + buf[uncompressed] = '\0'; + *outptr = buf; + ret: /* Cleanup */ free(gunzip_window); @@ -1269,3 +1320,75 @@ unpack_gz_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) DEALLOC_STATE; return total; } + +void FAST_FUNC +xmalloc_unpack_gz(transformer_aux_data_t *aux, int src_fd, char **outptr, size_t *maxsz_p) +{ + uint32_t v32, uncompressed; + long long int n; + off_t compressed; + char *tmp = NULL; + DECLARE_STATE; + + ALLOC_STATE; + to_read = -1; + bytebuffer = xmalloc(bytebuffer_max); + gunzip_src_fd = src_fd; + + compressed = xlseek(src_fd, -4, SEEK_END); + xread(src_fd, &uncompressed, 4); + IF_BIG_ENDIAN(uncompressed = SWAP_LE32(uncompressed);) + xlseek(src_fd, 2, SEEK_SET); + + if (!uncompressed || uncompressed > *maxsz_p || compressed > 4198404) { + /* The last check makes sure the uncompressed size is less than 4GB. + The maximum compression ratio for zlib is 1:1023, and the uncompressed + size field is u32 - it wraps when uncompressed size is > 4GB. + + 4096 * 1024 * 1024 / 1023 = 4198404 bytes. + */ + goto err; + } + + if (!check_header_gzip(PASS_STATE aux)) { + bb_error_msg("corrupted data"); + goto err; + } + + n = xmalloc_inflate_unzip_internal(PASS_STATE src_fd, &tmp, uncompressed); + if (n < 0 || !tmp) { + goto err; + } + + if (!top_up(PASS_STATE 8)) { + bb_error_msg("corrupted data"); + goto err; + } + + /* Validate decompression - crc */ + v32 = buffer_read_le_u32(PASS_STATE_ONLY); + if ((~gunzip_crc) != v32) { + bb_error_msg("crc error"); + goto err; + } + + /* Validate decompression - size */ + v32 = buffer_read_le_u32(PASS_STATE_ONLY); + if ((uint32_t)gunzip_bytes_out != v32) { + bb_error_msg("incorrect length"); + goto err; + } + + if (top_up(PASS_STATE 2)) { + goto err; /* Concatenated stream, bail out */ + } + + *outptr = tmp; + *maxsz_p = uncompressed; + goto ret; + err: + free(tmp); + ret: + free(bytebuffer); + DEALLOC_STATE; +} diff --git a/archival/libarchive/open_transformer.c b/archival/libarchive/open_transformer.c index 1986630..063e6a1 100644 --- a/archival/libarchive/open_transformer.c +++ b/archival/libarchive/open_transformer.c @@ -213,6 +213,22 @@ void* FAST_FUNC xmalloc_open_zipped_read_close(const char *fname, size_t *maxsz_ int fd; char *image; + /* Fast path for well-behaved gzip files, avoiding forks. */ + if (ENABLE_FEATURE_SEAMLESS_GZ && ENABLE_DESKTOP && BB_MMU) { + uint16_t magic; + fd = open(fname, O_RDONLY); + xread(fd, &magic, 2); + + if (magic == GZIP_MAGIC) { + image = NULL; + xmalloc_unpack_gz(NULL, fd, &image, maxsz_p); + if (image) + return image; + } + + close(fd); + } + fd = open_zipped(fname, /*fail_if_not_compressed:*/ 0); if (fd < 0) return NULL; diff --git a/include/bb_archive.h b/include/bb_archive.h index b82cfd8..618a632 100644 --- a/include/bb_archive.h +++ b/include/bb_archive.h @@ -217,6 +217,7 @@ int FAST_FUNC check_signature16(transformer_aux_data_t *aux, int src_fd, unsigne IF_DESKTOP(long long) int inflate_unzip(transformer_aux_data_t *aux, int src_fd, int dst_fd) FAST_FUNC; IF_DESKTOP(long long) int unpack_Z_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) FAST_FUNC; IF_DESKTOP(long long) int unpack_gz_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) FAST_FUNC; +void xmalloc_unpack_gz(transformer_aux_data_t *aux, int src_fd, char **outptr, size_t *maxsz_p) FAST_FUNC; IF_DESKTOP(long long) int unpack_bz2_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) FAST_FUNC; IF_DESKTOP(long long) int unpack_lzma_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) FAST_FUNC; IF_DESKTOP(long long) int unpack_xz_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) FAST_FUNC; -- 1.8.3.1
_______________________________________________ busybox mailing list busybox@busybox.net http://lists.busybox.net/mailman/listinfo/busybox