From: Jim Beveridge <ji...@chromium.org>

The original code is by Jim Beveridge working on Fuchsia. I merged it
with slight changes.

Time to tar two 1GB files:

Before patch:

real    0m6.428s
user    0m0.245s
sys     0m4.881s

real    0m6.454s
user    0m0.239s
sys     0m4.883s

real    0m6.515s
user    0m0.259s
sys     0m4.839s

After patch:

real    0m4.755s
user    0m0.026s
sys     0m1.598s

real    0m4.788s
user    0m0.063s
sys     0m1.578s

real    0m4.822s
user    0m0.007s
sys     0m1.662s

A similar speedup can be observed for untaring.

In addition to the buffer size increase we change the code to only create
directories for non-compliant tar files and we check for st to be NULL
in the recursive copy function.
---
 tar.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 55 insertions(+), 17 deletions(-)

diff --git a/tar.c b/tar.c
index 53a737c..8cd1abe 100644
--- a/tar.c
+++ b/tar.c
@@ -16,6 +16,8 @@
 #include "util.h"
 
 #define BLKSIZ 512
+// COPY_CHUNK_SIZE must be a power of 2
+#define COPY_CHUNK_SIZE 8192
 
 enum Type {
        REG       = '0',
@@ -236,10 +238,13 @@ archive(const char *path)
        ewrite(tarfd, b, BLKSIZ);
 
        if (fd != -1) {
-               while ((l = eread(fd, b, BLKSIZ)) > 0) {
-                       if (l < BLKSIZ)
-                               memset(b + l, 0, BLKSIZ - l);
-                       ewrite(tarfd, b, BLKSIZ);
+               char chunk[COPY_CHUNK_SIZE];
+               while ((l = eread(fd, chunk, COPY_CHUNK_SIZE)) > 0) {
+                       // Ceiling to BLKSIZ boundary
+                       int ceilsize = (l + (BLKSIZ-1)) & ~(BLKSIZ-1);
+                       if (l < ceilsize)
+                               memset(chunk + l, 0, ceilsize - l);
+                       ewrite(tarfd, chunk, ceilsize);
                }
                close(fd);
        }
@@ -250,7 +255,7 @@ archive(const char *path)
 static int
 unarchive(char *fname, ssize_t l, char b[BLKSIZ])
 {
-       char lname[101], *tmp, *p;
+       char lname[101], *p;
        long mode, major, minor, type, mtime, uid, gid;
        struct header *h = (struct header *)b;
        int fd = -1;
@@ -261,9 +266,13 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
        if (remove(fname) < 0 && errno != ENOENT)
                weprintf("remove %s:", fname);
 
-       tmp = estrdup(fname);
-       mkdirp(dirname(tmp), 0777, 0777);
-       free(tmp);
+       // tar files normally create the directory chain. This is a fallback
+       // for noncompliant tar files.
+       if (h->type != DIRECTORY) {
+               char* tmp = estrdup(fname);
+               mkdirp(dirname(tmp), 0777, 0777);
+               free(tmp);
+       }
 
        switch (h->type) {
        case REG:
@@ -319,9 +328,25 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
                eprintf("strtol %s: invalid number\n", h->gid);
 
        if (fd != -1) {
-               for (; l > 0; l -= BLKSIZ)
-                       if (eread(tarfd, b, BLKSIZ) > 0)
-                               ewrite(fd, b, MIN(l, BLKSIZ));
+               // Ceiling to BLKSIZ boundary
+               int readsize = (l + (BLKSIZ-1)) & ~(BLKSIZ-1);
+               char chunk[COPY_CHUNK_SIZE];
+               int lastread = 0;
+
+               for (; readsize > 0; l -= lastread, readsize -= lastread) {
+                       int chunk_size = MIN(readsize, COPY_CHUNK_SIZE);
+                       // Short reads are legal, so don't expect to read
+                       // everything that was requested.
+                       lastread = eread(tarfd, chunk, chunk_size);
+                       if (lastread == 0) {
+                               close(fd);
+                               remove(fname);
+                               eprintf("unexpected end of file reading %s.\n",
+                                       fname);
+                       }
+
+                       ewrite(fd, chunk, MIN(l, lastread));
+               }
                close(fd);
        }
 
@@ -331,7 +356,7 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
        times[0].tv_sec = times[1].tv_sec = mtime;
        times[0].tv_nsec = times[1].tv_nsec = 0;
        if (!mflag && utimensat(AT_FDCWD, fname, times, AT_SYMLINK_NOFOLLOW) < 
0)
-               weprintf("utimensat %s:\n", fname);
+               weprintf("utimensat %s %d:\n", fname, errno);
        if (h->type == SYMLINK) {
                if (!getuid() && lchown(fname, uid, gid))
                        weprintf("lchown %s:\n", fname);
@@ -349,10 +374,23 @@ static void
 skipblk(ssize_t l)
 {
        char b[BLKSIZ];
-
-       for (; l > 0; l -= BLKSIZ)
-               if (!eread(tarfd, b, BLKSIZ))
-                       break;
+       int lastread = 0;
+       // Ceiling to BLKSIZ boundary
+       int ceilsize = (l + (BLKSIZ-1)) & ~(BLKSIZ-1);
+
+       off_t offset = lseek(tarfd, ceilsize, SEEK_CUR);
+       if (offset >= ceilsize)
+               return;
+       if (errno != ESPIPE)
+               eprintf("unexpected end of file.\n");
+
+       // This is a pipe, socket or FIFO. Fall back to a sequential read.
+       for (; ceilsize > 0; ceilsize -= lastread) {
+               int chunk_size = MIN(ceilsize, BLKSIZ);
+               lastread = eread(tarfd, b, chunk_size);
+               if (lastread == 0)
+                       eprintf("unexpected end of file %d.\n", errno);
+        }
 }
 
 static int
@@ -370,7 +408,7 @@ c(const char *path, struct stat *st, void *data, struct 
recursor *r)
        if (vflag)
                puts(path);
 
-       if (S_ISDIR(st->st_mode))
+       if (st && S_ISDIR(st->st_mode))
                recurse(path, NULL, r);
 }
 
-- 
2.7.4


Reply via email to