Re: fts: optimize by using fstatat instead of lstat
On Wed, 19 Nov 2014 20:30:04 -0800, Philip Guenther wrote: > Committed, which leads to the next diff: use O_CLOEXEC on all internal > fds, O_DIRECTORY when opening a directory other than ".", and drop the 3rd > argument to open() as unnecessary when O_CREAT isn't used. Looks good to me. OK millert@ - todd
Re: fts: optimize by using fstatat instead of lstat
On Wed, 19 Nov 2014, enh wrote: > On Tue, Nov 18, 2014 at 10:55 PM, Philip Guenther wrote: > > enh, this look as good in the perf measurement cited in that googlesource > > link? > > yes. performance seems roughly identical. thanks! Committed, which leads to the next diff: use O_CLOEXEC on all internal fds, O_DIRECTORY when opening a directory other than ".", and drop the 3rd argument to open() as unnecessary when O_CREAT isn't used. (I think doug@ wondered about using O_CLOEXEC before but I demurred then because these fds _are_ accessible to applications via members in the visible header file and we had lower-hanging fruit to pick then, but seeing that FreeBSD has done this, maybe it's safe? (I've pinged sthen@ for a ports src scan...) Philip Guenther Index: gen/fts.c === RCS file: /cvs/src/lib/libc/gen/fts.c,v retrieving revision 1.48 diff -u -p -r1.48 fts.c --- gen/fts.c 20 Nov 2014 04:14:15 - 1.48 +++ gen/fts.c 20 Nov 2014 04:18:31 - @@ -159,7 +159,8 @@ fts_open(char * const *argv, int options * and ".." are all fairly nasty problems. Note, if we can't get the * descriptor we run anyway, just more slowly. */ - if (!ISSET(FTS_NOCHDIR) && (sp->fts_rfd = open(".", O_RDONLY, 0)) < 0) + if (!ISSET(FTS_NOCHDIR) && + (sp->fts_rfd = open(".", O_RDONLY | O_CLOEXEC)) < 0) SET(FTS_NOCHDIR); if (nitems == 0) @@ -284,7 +285,8 @@ fts_read(FTS *sp) (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) { p->fts_info = fts_stat(sp, p, 1, -1); if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) { - if ((p->fts_symfd = open(".", O_RDONLY, 0)) < 0) { + if ((p->fts_symfd = + open(".", O_RDONLY | O_CLOEXEC)) < 0) { p->fts_errno = errno; p->fts_info = FTS_ERR; } else @@ -374,7 +376,7 @@ next: tmp = p; p->fts_info = fts_stat(sp, p, 1, -1); if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) { if ((p->fts_symfd = - open(".", O_RDONLY, 0)) < 0) { + open(".", O_RDONLY | O_CLOEXEC)) < 0) { p->fts_errno = errno; p->fts_info = FTS_ERR; } else @@ -513,7 +515,7 @@ fts_children(FTS *sp, int instr) ISSET(FTS_NOCHDIR)) return (sp->fts_child = fts_build(sp, instr)); - if ((fd = open(".", O_RDONLY, 0)) < 0) + if ((fd = open(".", O_RDONLY | O_CLOEXEC)) < 0) return (NULL); sp->fts_child = fts_build(sp, instr); if (fchdir(fd)) { @@ -1026,7 +1028,7 @@ fts_safe_changedir(FTS *sp, FTSENT *p, i newfd = fd; if (ISSET(FTS_NOCHDIR)) return (0); - if (fd < 0 && (newfd = open(path, O_RDONLY, 0)) < 0) + if (fd < 0 && (newfd = open(path, O_RDONLY|O_DIRECTORY|O_CLOEXEC)) < 0) return (-1); if (fstat(newfd, &sb)) { ret = -1;
Re: fts: optimize by using fstatat instead of lstat
I was considering doing something like this was but was unsure about interactions with FTS_LOGICAL (and thus FTS_NOCHDIR). I suppose since fts_name is effectively d_name this is OK. I also prefer passing dirp directly so the caller doesn't need to use dirfd itself but that's not a big deal. - todd
Re: fts: optimize by using fstatat instead of lstat
On Tue, 18 Nov 2014, Todd C. Miller wrote: > On Tue, 18 Nov 2014 15:19:50 +0100, Martin Pieuchot wrote: > > > Here's the diff inline to facilitate review: > > Thank you, I was having trouble navigating the web version to find > a unified diff. > > The diff looks good to me. I took a stab at using fstatat() instead > of the existing lstat() to avoid the extra else clause but it just > made things uglier. Hmm, I think I like how FreeBSD did this a bit better, as they optimize to fstatat() whenever fts isn't chdir'ing, including in the FTS_LOGICAL case. (They also have some O_DIRECTORY and O_CLOEXEC diffs to pull in...) Here's FreeBSD's r260571, tweaked to apply and with a pointless use of the comma operator eliminated. Their commit message: r260571 | jilles | 2014-01-12 12:30:55 -0800 (Sun, 12 Jan 2014) | 9 lines fts: Stat things relative to the directory fd, if possible. As a result, the kernel needs to process shorter pathnames if fts is not changing directories (if fts follows symlinks (-L option to utilities), fts cannot open "." or FTS_NOCHDIR was specified). Side effect: If pathnames exceed PATH_MAX, [ENAMETOOLONG] is not hit at the stat stage but later (opendir or application fts_accpath) or not at all. enh, this look as good in the perf measurement cited in that googlesource link? Philip Index: gen/fts.c === RCS file: /cvs/src/lib/libc/gen/fts.c,v retrieving revision 1.47 diff -u -p -r1.47 fts.c --- gen/fts.c 8 Oct 2014 04:36:23 - 1.47 +++ gen/fts.c 19 Nov 2014 06:48:56 - @@ -49,7 +49,7 @@ static size_t fts_maxarglen(char * cons static void fts_padjust(FTS *, FTSENT *); static int fts_palloc(FTS *, size_t); static FTSENT *fts_sort(FTS *, FTSENT *, int); -static u_short fts_stat(FTS *, FTSENT *, int); +static u_short fts_stat(FTS *, FTSENT *, int, int); static int fts_safe_changedir(FTS *, FTSENT *, int, char *); #defineISDOT(a)(a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2]))) @@ -116,7 +116,7 @@ fts_open(char * const *argv, int options p->fts_level = FTS_ROOTLEVEL; p->fts_parent = parent; p->fts_accpath = p->fts_name; - p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW)); + p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW), -1); /* Command-line "." and ".." are real directories. */ if (p->fts_info == FTS_DOT) @@ -270,7 +270,7 @@ fts_read(FTS *sp) /* Any type of file may be re-visited; re-stat and re-turn. */ if (instr == FTS_AGAIN) { - p->fts_info = fts_stat(sp, p, 0); + p->fts_info = fts_stat(sp, p, 0, -1); return (p); } @@ -282,7 +282,7 @@ fts_read(FTS *sp) */ if (instr == FTS_FOLLOW && (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) { - p->fts_info = fts_stat(sp, p, 1); + p->fts_info = fts_stat(sp, p, 1, -1); if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) { if ((p->fts_symfd = open(".", O_RDONLY, 0)) < 0) { p->fts_errno = errno; @@ -371,7 +371,7 @@ next: tmp = p; if (p->fts_instr == FTS_SKIP) goto next; if (p->fts_instr == FTS_FOLLOW) { - p->fts_info = fts_stat(sp, p, 1); + p->fts_info = fts_stat(sp, p, 1, -1); if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) { if ((p->fts_symfd = open(".", O_RDONLY, 0)) < 0) { @@ -716,10 +716,11 @@ mem1: saved_errno = errno; if (ISSET(FTS_NOCHDIR)) { p->fts_accpath = p->fts_path; memmove(cp, p->fts_name, p->fts_namelen + 1); - } else + p->fts_info = fts_stat(sp, p, 0, dirfd(dirp)); + } else { p->fts_accpath = p->fts_name; - /* Stat it. */ - p->fts_info = fts_stat(sp, p, 0); + p->fts_info = fts_stat(sp, p, 0, -1); + } /* Decrement link count if applicable. */ if (nlinks > 0 && (p->fts_info == FTS_D || @@ -786,13 +787,20 @@ mem1: saved_errno = errno; } static u_short -fts_stat(FTS *sp, FTSENT *p, int follow) +fts_stat(FTS *sp, FTSENT *p, int follow, int dfd) { FTSENT *t; dev_t dev; ino_t ino; struct stat *sbp, sb; int saved_errno; +
Re: fts: optimize by using fstatat instead of lstat
On Tue, 18 Nov 2014 15:19:50 +0100, Martin Pieuchot wrote: > Here's the diff inline to facilitate review: Thank you, I was having trouble navigating the web version to find a unified diff. The diff looks good to me. I took a stab at using fstatat() instead of the existing lstat() to avoid the extra else clause but it just made things uglier. OK millert@ - todd
Re: fts: optimize by using fstatat instead of lstat
On 17/11/14(Mon) 17:00, enh wrote: > Sony sent us (Android) a patch to use fstatat instead of lstat in fts. > Sadly our copy of fts.c is already a little diverged from yours > (because we have to use strlen to get the length of the d_name, not > having a d_namlen field) but i thought i'd forward this your way > anyway to reduce the need for further divergence... > > https://android-review.googlesource.com/#/c/113415/ Here's the diff inline to facilitate review: Index: gen/fts.c === RCS file: /home/ncvs/src/lib/libc/gen/fts.c,v retrieving revision 1.47 diff -u -p -r1.47 fts.c --- gen/fts.c 8 Oct 2014 04:36:23 - 1.47 +++ gen/fts.c 18 Nov 2014 14:17:07 - @@ -49,7 +49,7 @@ static size_t fts_maxarglen(char * cons static void fts_padjust(FTS *, FTSENT *); static int fts_palloc(FTS *, size_t); static FTSENT *fts_sort(FTS *, FTSENT *, int); -static u_short fts_stat(FTS *, FTSENT *, int); +static u_short fts_stat(FTS *, FTSENT *, int, DIR *); static int fts_safe_changedir(FTS *, FTSENT *, int, char *); #defineISDOT(a)(a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2]))) @@ -116,7 +116,7 @@ fts_open(char * const *argv, int options p->fts_level = FTS_ROOTLEVEL; p->fts_parent = parent; p->fts_accpath = p->fts_name; - p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW)); + p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW), NULL); /* Command-line "." and ".." are real directories. */ if (p->fts_info == FTS_DOT) @@ -270,7 +270,7 @@ fts_read(FTS *sp) /* Any type of file may be re-visited; re-stat and re-turn. */ if (instr == FTS_AGAIN) { - p->fts_info = fts_stat(sp, p, 0); + p->fts_info = fts_stat(sp, p, 0, NULL); return (p); } @@ -282,7 +282,7 @@ fts_read(FTS *sp) */ if (instr == FTS_FOLLOW && (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) { - p->fts_info = fts_stat(sp, p, 1); + p->fts_info = fts_stat(sp, p, 1, NULL); if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) { if ((p->fts_symfd = open(".", O_RDONLY, 0)) < 0) { p->fts_errno = errno; @@ -371,7 +371,7 @@ next: tmp = p; if (p->fts_instr == FTS_SKIP) goto next; if (p->fts_instr == FTS_FOLLOW) { - p->fts_info = fts_stat(sp, p, 1); + p->fts_info = fts_stat(sp, p, 1, NULL); if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) { if ((p->fts_symfd = open(".", O_RDONLY, 0)) < 0) { @@ -719,7 +719,7 @@ mem1: saved_errno = errno; } else p->fts_accpath = p->fts_name; /* Stat it. */ - p->fts_info = fts_stat(sp, p, 0); + p->fts_info = fts_stat(sp, p, 0, dirp); /* Decrement link count if applicable. */ if (nlinks > 0 && (p->fts_info == FTS_D || @@ -786,7 +786,7 @@ mem1: saved_errno = errno; } static u_short -fts_stat(FTS *sp, FTSENT *p, int follow) +fts_stat(FTS *sp, FTSENT *p, int follow, DIR *dirp) { FTSENT *t; dev_t dev; @@ -810,6 +810,11 @@ fts_stat(FTS *sp, FTSENT *p, int follow) return (FTS_SLNONE); } p->fts_errno = saved_errno; + goto err; + } + } else if (dirp) { + if (fstatat(dirfd(dirp), p->fts_name, sbp, AT_SYMLINK_NOFOLLOW)) { + p->fts_errno = errno; goto err; } } else if (lstat(p->fts_accpath, sbp)) {