Re: [hackers] [sbase] [PATCH 04/10] Don't use buffered IO (fread) when not appropriate
Hi Laslo On Tue, Dec 6, 2016 at 12:51 PM, Laslo Hunholdwrote: > On Tue, 6 Dec 2016 10:26:22 +0100 > Silvan Jegen wrote: >> It only compiled for me because "util.h" includes stdio.h so the >> definitions are included already. We can still remove this include if >> we don't want it to be included twice but I don't know which approach >> is preferred here. > > we definitely need to include it here as your implementation does not > represent everyone's implementation. If Posix demands to include > something for a given function, we should include it. What I meant is that cksum.c includes transitively (because cksum.c includes "util.h" which includes ). As far as I know, the setup of my development environment should not matter in that case and the code should compile on any C compiler (with a preprocessor) without having to include in cksum.c again. Including directly in cksum.c makes the dependency more explicit though and even if this results in the code of the header being included again (as I think it will), I doubt it will have a big impact on compilation speed. Cheers, Silvan
Re: [hackers] [sbase] [PATCH 04/10] Don't use buffered IO (fread) when not appropriate
On Tue, 6 Dec 2016 10:26:22 +0100 Silvan Jegenwrote: Hey Silvan, > It only compiled for me because "util.h" includes stdio.h so the > definitions are included already. We can still remove this include if > we don't want it to be included twice but I don't know which approach > is preferred here. we definitely need to include it here as your implementation does not represent everyone's implementation. If Posix demands to include something for a given function, we should include it. Cheers Laslo -- Laslo Hunhold
Re: [hackers] [sbase] [PATCH 04/10] Don't use buffered IO (fread) when not appropriate
On Tue, Dec 6, 2016 at 9:17 AM, Michael Forneywrote: > On Mon, Dec 5, 2016 at 12:15 PM, Silvan Jegen wrote: >> Hi >> >> Some comments below. >> >> On Sun, Dec 04, 2016 at 09:55:06PM -0800, Michael Forney wrote: >>> diff --git a/cksum.c b/cksum.c >>> index 570ca81..b53ec17 100644 >>> --- a/cksum.c >>> +++ b/cksum.c >>> @@ -1,7 +1,9 @@ >>> /* See LICENSE file for copyright and license details. */ >>> +#include >>> #include >>> #include >> >> This include is not needed anymore. > > It is needed for printf, putchar, fputs, and stdout. You're right. It only compiled for me because "util.h" includes stdio.h so the definitions are included already. We can still remove this include if we don't want it to be included twice but I don't know which approach is preferred here. >>> diff --git a/od.c b/od.c >>> index b5884e7..9ae1e29 100644 >>> --- a/od.c >>> +++ b/od.c >>> @@ -1,8 +1,10 @@ >>> /* See LICENSE file for copyright and license details. */ >>> +#include >>> #include >>> #include >> >> This include is not needed anymore. > > It is needed for printf, fputc, and stdout. Same here. >>> for (i = 0; i < argc; i++) { >>> - if (!(fps[i] = fopen(argv[i], aflag ? "a" : "w"))) { >>> - weprintf("fopen %s:", argv[i]); >>> + if ((fds[i] = open(argv[i], O_WRONLY|O_CREAT|aflag, 0666)) < >>> 0) { >> >> umask will be honored when creating a file but I am wondering if just >> setting mode_t to 0660 would be the safer option here. > > POSIX says that it has to be 0666[0]: > > """ > When a file that does not exist is created, the following features > defined in the System Interfaces volume of POSIX.1-2008 shall apply > unless the utility or function description states otherwise: > > ... > > 3. If the file is a regular file, the permission bits of the file > shall be set to: S_IROTH | S_IWOTH | S_IRGRP | S_IWGRP | S_IRUSR | > S_IWUSR > > (see the description of File Modes in XBD Headers, ) > except that the bits specified by the file mode creation mask of the > process shall be cleared. > """ Ah, I did not know that POSIX had anything to say on this. Then we keep it the way it is. Cheers, Silvan > [0] > http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap01.html#tag_17_01_01_04 >
Re: [hackers] [sbase] [PATCH 04/10] Don't use buffered IO (fread) when not appropriate
Hi Some comments below. On Sun, Dec 04, 2016 at 09:55:06PM -0800, Michael Forney wrote: > fread reads the entire requested size (BUFSIZ), which causes tools to > block if only small amounts of data are available at a time. At best, > this causes unnecessary copies and inefficiency, at worst, tools like > tee and cat are almost unusable in some cases since they only display > large chunks of data at a time. > --- > cksum.c | 31 +-- > crypt.h | 2 +- > libutil/crypt.c | 37 +++-- > od.c| 42 ++ > tee.c | 39 +++ > 5 files changed, 82 insertions(+), 69 deletions(-) > > diff --git a/cksum.c b/cksum.c > index 570ca81..b53ec17 100644 > --- a/cksum.c > +++ b/cksum.c > @@ -1,7 +1,9 @@ > /* See LICENSE file for copyright and license details. */ > +#include > #include > #include This include is not needed anymore. > #include > +#include > > #include "util.h" > > @@ -61,19 +63,20 @@ static const unsigned long crctab[] = { > 0x, > }; > > static void > -cksum(FILE *fp, const char *s) > +cksum(int fd, const char *s) > { > - size_t len = 0, i, n; > + ssize_t n; > + size_t len = 0, i; > uint32_t ck = 0; > unsigned char buf[BUFSIZ]; > > - while ((n = fread(buf, 1, sizeof(buf), fp))) { > + while ((n = read(fd, buf, sizeof(buf))) > 0) { > for (i = 0; i < n; i++) > ck = (ck << 8) ^ crctab[(ck >> 24) ^ buf[i]]; > len += n; > } > - if (ferror(fp)) { > - weprintf("fread %s:", s ? s : ""); > + if (n < 0) { > + weprintf("read %s:", s ? s : ""); > ret = 1; > return; > } > @@ -92,29 +95,29 @@ cksum(FILE *fp, const char *s) > int > main(int argc, char *argv[]) > { > - FILE *fp; > + int fd; > > argv0 = argv[0], argc--, argv++; > > if (!argc) { > - cksum(stdin, NULL); > + cksum(0, NULL); > } else { > for (; *argv; argc--, argv++) { > if (!strcmp(*argv, "-")) { > *argv = ""; > - fp = stdin; > - } else if (!(fp = fopen(*argv, "r"))) { > - weprintf("fopen %s:", *argv); > + fd = 0; > + } else if ((fd = open(*argv, O_RDONLY)) < 0) { > + weprintf("open %s:", *argv); > ret = 1; > continue; > } > - cksum(fp, *argv); > - if (fp != stdin && fshut(fp, *argv)) > - ret = 1; > + cksum(fd, *argv); > + if (fd != 0) > + close(fd); > } > } > > - ret |= fshut(stdin, "") | fshut(stdout, ""); > + ret |= fshut(stdout, ""); > > return ret; > } > diff --git a/crypt.h b/crypt.h > index e0cc08d..2fd2932 100644 > --- a/crypt.h > +++ b/crypt.h > @@ -8,5 +8,5 @@ struct crypt_ops { > > int cryptcheck(int, char **, struct crypt_ops *, uint8_t *, size_t); > int cryptmain(int, char **, struct crypt_ops *, uint8_t *, size_t); > -int cryptsum(struct crypt_ops *, FILE *, const char *, uint8_t *); > +int cryptsum(struct crypt_ops *, int, const char *, uint8_t *); > void mdprint(const uint8_t *, const char *, size_t); > diff --git a/libutil/crypt.c b/libutil/crypt.c > index 6991c39..e285614 100644 > --- a/libutil/crypt.c > +++ b/libutil/crypt.c > @@ -1,8 +1,10 @@ > /* See LICENSE file for copyright and license details. */ > +#include > #include > #include > #include > #include > +#include > > #include "../crypt.h" > #include "../text.h" > @@ -41,7 +43,7 @@ static void > mdchecklist(FILE *listfp, struct crypt_ops *ops, uint8_t *md, size_t sz, > int *formatsucks, int *noread, int *nonmatch) > { > - FILE *fp; > + int fd; > size_t bufsiz = 0; > int r; > char *line = NULL, *file, *p; > @@ -59,12 +61,12 @@ mdchecklist(FILE *listfp, struct crypt_ops *ops, uint8_t > *md, size_t sz, > file += 2; > for (p = file; *p && *p != '\n' && *p != '\r'; p++); /* strip > newline */ > *p = '\0'; > - if (!(fp = fopen(file, "r"))) { > - weprintf("fopen %s:", file); > + if ((fd = open(file, O_RDONLY)) < 0) { > + weprintf("open %s:", file); > (*noread)++; > continue; > } > - if (cryptsum(ops, fp, file, md)) { > + if (cryptsum(ops, fd, file, md)) { > (*noread)++; > continue; > } > @@ -77,7 +79,7 @@
[hackers] [sbase] [PATCH 04/10] Don't use buffered IO (fread) when not appropriate
fread reads the entire requested size (BUFSIZ), which causes tools to block if only small amounts of data are available at a time. At best, this causes unnecessary copies and inefficiency, at worst, tools like tee and cat are almost unusable in some cases since they only display large chunks of data at a time. --- cksum.c | 31 +-- crypt.h | 2 +- libutil/crypt.c | 37 +++-- od.c| 42 ++ tee.c | 39 +++ 5 files changed, 82 insertions(+), 69 deletions(-) diff --git a/cksum.c b/cksum.c index 570ca81..b53ec17 100644 --- a/cksum.c +++ b/cksum.c @@ -1,7 +1,9 @@ /* See LICENSE file for copyright and license details. */ +#include #include #include #include +#include #include "util.h" @@ -61,19 +63,20 @@ static const unsigned long crctab[] = { 0x, }; static void -cksum(FILE *fp, const char *s) +cksum(int fd, const char *s) { - size_t len = 0, i, n; + ssize_t n; + size_t len = 0, i; uint32_t ck = 0; unsigned char buf[BUFSIZ]; - while ((n = fread(buf, 1, sizeof(buf), fp))) { + while ((n = read(fd, buf, sizeof(buf))) > 0) { for (i = 0; i < n; i++) ck = (ck << 8) ^ crctab[(ck >> 24) ^ buf[i]]; len += n; } - if (ferror(fp)) { - weprintf("fread %s:", s ? s : ""); + if (n < 0) { + weprintf("read %s:", s ? s : ""); ret = 1; return; } @@ -92,29 +95,29 @@ cksum(FILE *fp, const char *s) int main(int argc, char *argv[]) { - FILE *fp; + int fd; argv0 = argv[0], argc--, argv++; if (!argc) { - cksum(stdin, NULL); + cksum(0, NULL); } else { for (; *argv; argc--, argv++) { if (!strcmp(*argv, "-")) { *argv = ""; - fp = stdin; - } else if (!(fp = fopen(*argv, "r"))) { - weprintf("fopen %s:", *argv); + fd = 0; + } else if ((fd = open(*argv, O_RDONLY)) < 0) { + weprintf("open %s:", *argv); ret = 1; continue; } - cksum(fp, *argv); - if (fp != stdin && fshut(fp, *argv)) - ret = 1; + cksum(fd, *argv); + if (fd != 0) + close(fd); } } - ret |= fshut(stdin, "") | fshut(stdout, ""); + ret |= fshut(stdout, ""); return ret; } diff --git a/crypt.h b/crypt.h index e0cc08d..2fd2932 100644 --- a/crypt.h +++ b/crypt.h @@ -8,5 +8,5 @@ struct crypt_ops { int cryptcheck(int, char **, struct crypt_ops *, uint8_t *, size_t); int cryptmain(int, char **, struct crypt_ops *, uint8_t *, size_t); -int cryptsum(struct crypt_ops *, FILE *, const char *, uint8_t *); +int cryptsum(struct crypt_ops *, int, const char *, uint8_t *); void mdprint(const uint8_t *, const char *, size_t); diff --git a/libutil/crypt.c b/libutil/crypt.c index 6991c39..e285614 100644 --- a/libutil/crypt.c +++ b/libutil/crypt.c @@ -1,8 +1,10 @@ /* See LICENSE file for copyright and license details. */ +#include #include #include #include #include +#include #include "../crypt.h" #include "../text.h" @@ -41,7 +43,7 @@ static void mdchecklist(FILE *listfp, struct crypt_ops *ops, uint8_t *md, size_t sz, int *formatsucks, int *noread, int *nonmatch) { - FILE *fp; + int fd; size_t bufsiz = 0; int r; char *line = NULL, *file, *p; @@ -59,12 +61,12 @@ mdchecklist(FILE *listfp, struct crypt_ops *ops, uint8_t *md, size_t sz, file += 2; for (p = file; *p && *p != '\n' && *p != '\r'; p++); /* strip newline */ *p = '\0'; - if (!(fp = fopen(file, "r"))) { - weprintf("fopen %s:", file); + if ((fd = open(file, O_RDONLY)) < 0) { + weprintf("open %s:", file); (*noread)++; continue; } - if (cryptsum(ops, fp, file, md)) { + if (cryptsum(ops, fd, file, md)) { (*noread)++; continue; } @@ -77,7 +79,7 @@ mdchecklist(FILE *listfp, struct crypt_ops *ops, uint8_t *md, size_t sz, } else { (*formatsucks)++; } - fclose(fp); + close(fd); } free(line); } @@ -124,11 +126,11 @@ cryptcheck(int argc, char