v3: Use the aux struct as requested by Denys v2: Add missing check on open The performance and number of processes for a "depmod -a" with gzipped modules was abysmal. This patch adds a fast path without fork for well- behaved gzip files, benefiting all users of xmalloc_open_zipped_read_close.
"modinfo radeon.ko.gz", a single-file reader, got 30% faster. "depmod -a", which used to fork over 800 times, got 20% faster. And of course a whole lot less processes -> much saved RAM. function old new delta inflate_unzip_internal 2304 2521 +217 xmalloc_open_zipped_read_close 73 201 +128 unpack_gz_stream 567 570 +3 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/0 up/down: 348/0) Total: 348 bytes -- http://www.fastmail.com - Does exactly what it says on the tin
From 5ad6804ed4485eae176da45524ea848a00b11929 Mon Sep 17 00:00:00 2001 From: Lauri Kasanen <cur...@operamail.com> Date: Sun, 30 Nov 2014 21:37:10 +0200 Subject: [PATCH] Add a gzip fastpath for the xmalloc readers, v3 v3: Use the aux struct as requested by Denys v2: Add missing check on open The performance and number of processes for a "depmod -a" with gzipped modules was abysmal. This patch adds a fast path without fork for well- behaved gzip files, benefiting all users of xmalloc_open_zipped_read_close. "modinfo radeon.ko.gz", a single-file reader, got 30% faster. "depmod -a", which used to fork over 800 times, got 20% faster. And of course a whole lot less processes -> much saved RAM. function old new delta inflate_unzip_internal 2304 2521 +217 xmalloc_open_zipped_read_close 73 201 +128 unpack_gz_stream 567 570 +3 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/0 up/down: 348/0) Total: 348 bytes Signed-off-by: Lauri Kasanen <cur...@operamail.com> --- archival/libarchive/decompress_gunzip.c | 38 ++++++++++++++++++++++++++++----- archival/libarchive/open_transformer.c | 31 ++++++++++++++++++++++++++- include/bb_archive.h | 2 ++ 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/archival/libarchive/decompress_gunzip.c b/archival/libarchive/decompress_gunzip.c index 7c6f38e..938d21f 100644 --- a/archival/libarchive/decompress_gunzip.c +++ b/archival/libarchive/decompress_gunzip.c @@ -971,10 +971,11 @@ static int inflate_get_next_window(STATE_PARAM_ONLY) /* Called from unpack_gz_stream() and inflate_unzip() */ static IF_DESKTOP(long long) int -inflate_unzip_internal(STATE_PARAM int in, int out) +inflate_unzip_internal(STATE_PARAM transformer_aux_data_t *aux, int in, int out) { IF_DESKTOP(long long) int n = 0; ssize_t nwrote; + size_t bufsize = 0; /* Allocate all global buffers (for DYN_ALLOC option) */ gunzip_window = xmalloc(GUNZIP_WSIZE); @@ -1002,16 +1003,43 @@ inflate_unzip_internal(STATE_PARAM int in, int out) while (1) { int r = inflate_get_next_window(PASS_STATE_ONLY); - nwrote = full_write(out, gunzip_window, gunzip_outbuf_count); + + if (aux->mem_output_size) { + nwrote = gunzip_outbuf_count; + if (gunzip_outbuf_count + n > bufsize) { + // increase by four blocks each time + const size_t newsize = bufsize + 4 * gunzip_outbuf_count + 1; + aux->mem_output_buf = xrealloc(aux->mem_output_buf, newsize); + bufsize = newsize; + } + + if (bufsize > aux->mem_output_size) { + free(aux->mem_output_buf); + aux->mem_output_buf = NULL; + n = -1; + goto ret; + } + + memcpy(aux->mem_output_buf + n, gunzip_window, gunzip_outbuf_count); + } else { + nwrote = full_write(out, gunzip_window, gunzip_outbuf_count); + } if (nwrote != (ssize_t)gunzip_outbuf_count) { bb_perror_msg("write"); n = -1; goto ret; } - IF_DESKTOP(n += nwrote;) + n += nwrote; if (r == 0) break; } + /* Final realloc, plus zero byte */ + if (aux->mem_output_size) { + aux->mem_output_buf = xrealloc(aux->mem_output_buf, n + 1); + aux->mem_output_size = n; + aux->mem_output_buf[n] = '\0'; + } + /* Store unused bytes in a global buffer so calling applets can access it */ if (gunzip_bk >= 8) { /* Undo too much lookahead. The next read will be byte aligned @@ -1045,7 +1073,7 @@ inflate_unzip(transformer_aux_data_t *aux, int in, int out) // bytebuffer_max = 0x8000; bytebuffer_offset = 4; bytebuffer = xmalloc(bytebuffer_max); - n = inflate_unzip_internal(PASS_STATE in, out); + n = inflate_unzip_internal(PASS_STATE aux, in, out); free(bytebuffer); aux->crc32 = gunzip_crc; @@ -1224,7 +1252,7 @@ unpack_gz_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) goto ret; } - n = inflate_unzip_internal(PASS_STATE src_fd, dst_fd); + n = inflate_unzip_internal(PASS_STATE aux, src_fd, dst_fd); if (n < 0) { total = -1; goto ret; diff --git a/archival/libarchive/open_transformer.c b/archival/libarchive/open_transformer.c index 1986630..ad9dce5 100644 --- a/archival/libarchive/open_transformer.c +++ b/archival/libarchive/open_transformer.c @@ -211,7 +211,36 @@ int FAST_FUNC open_zipped(const char *fname, int fail_if_not_compressed) void* FAST_FUNC xmalloc_open_zipped_read_close(const char *fname, size_t *maxsz_p) { int fd; - char *image; + char *image = NULL; + + /* Fast path for well-behaved gzip files, avoiding forks. */ + if (ENABLE_FEATURE_SEAMLESS_GZ && ENABLE_DESKTOP && BB_MMU) { + uint16_t magic; + fd = open(fname, O_RDONLY); + if (fd < 0) + return NULL; + + xread(fd, &magic, 2); + + if (magic == GZIP_MAGIC) { + transformer_aux_data_t aux; + init_transformer_aux_data(&aux); + + /* In-memory decompression instead of the usual fork */ + aux.mem_output_size = *maxsz_p; + + unpack_gz_stream(&aux, fd, -1); + + if (aux.mem_output_buf) { + image = aux.mem_output_buf; + *maxsz_p = aux.mem_output_size; + } + } + + close(fd); + if (image) + return image; + } fd = open_zipped(fname, /*fail_if_not_compressed:*/ 0); if (fd < 0) diff --git a/include/bb_archive.h b/include/bb_archive.h index b82cfd8..6b19413 100644 --- a/include/bb_archive.h +++ b/include/bb_archive.h @@ -209,6 +209,8 @@ typedef struct transformer_aux_data_t { off_t bytes_in; /* used in unzip code only: needs to know packed size */ uint32_t crc32; time_t mtime; /* gunzip code may set this on exit */ + size_t mem_output_size; /* if non-zero, decompress to RAM instead of fd */ + char *mem_output_buf; } transformer_aux_data_t; void init_transformer_aux_data(transformer_aux_data_t *aux) FAST_FUNC; -- 1.8.3.1
_______________________________________________ busybox mailing list busybox@busybox.net http://lists.busybox.net/mailman/listinfo/busybox