Re: fts: optimize by using fstatat instead of lstat

2014-11-20 Thread Todd C. Miller
On Wed, 19 Nov 2014 20:30:04 -0800, Philip Guenther wrote:

> Committed, which leads to the next diff: use O_CLOEXEC on all internal 
> fds, O_DIRECTORY when opening a directory other than ".", and drop the 3rd 
> argument to open() as unnecessary when O_CREAT isn't used.

Looks good to me.  OK millert@

 - todd



Re: fts: optimize by using fstatat instead of lstat

2014-11-19 Thread Philip Guenther
On Wed, 19 Nov 2014, enh wrote:
> On Tue, Nov 18, 2014 at 10:55 PM, Philip Guenther  wrote:
> > enh, this look as good in the perf measurement cited in that googlesource
> > link?
> 
> yes. performance seems roughly identical. thanks!

Committed, which leads to the next diff: use O_CLOEXEC on all internal 
fds, O_DIRECTORY when opening a directory other than ".", and drop the 3rd 
argument to open() as unnecessary when O_CREAT isn't used.

(I think doug@ wondered about using O_CLOEXEC before but I demurred then 
because these fds _are_ accessible to applications via members in the 
visible header file and we had lower-hanging fruit to pick then, but 
seeing that FreeBSD has done this, maybe it's safe?

(I've pinged sthen@ for a ports src scan...)

Philip Guenther


Index: gen/fts.c
===
RCS file: /cvs/src/lib/libc/gen/fts.c,v
retrieving revision 1.48
diff -u -p -r1.48 fts.c
--- gen/fts.c   20 Nov 2014 04:14:15 -  1.48
+++ gen/fts.c   20 Nov 2014 04:18:31 -
@@ -159,7 +159,8 @@ fts_open(char * const *argv, int options
 * and ".." are all fairly nasty problems.  Note, if we can't get the
 * descriptor we run anyway, just more slowly.
 */
-   if (!ISSET(FTS_NOCHDIR) && (sp->fts_rfd = open(".", O_RDONLY, 0)) < 0)
+   if (!ISSET(FTS_NOCHDIR) &&
+   (sp->fts_rfd = open(".", O_RDONLY | O_CLOEXEC)) < 0)
SET(FTS_NOCHDIR);
 
if (nitems == 0)
@@ -284,7 +285,8 @@ fts_read(FTS *sp)
(p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) {
p->fts_info = fts_stat(sp, p, 1, -1);
if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
-   if ((p->fts_symfd = open(".", O_RDONLY, 0)) < 0) {
+   if ((p->fts_symfd =
+   open(".", O_RDONLY | O_CLOEXEC)) < 0) {
p->fts_errno = errno;
p->fts_info = FTS_ERR;
} else
@@ -374,7 +376,7 @@ next:   tmp = p;
p->fts_info = fts_stat(sp, p, 1, -1);
if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
if ((p->fts_symfd =
-   open(".", O_RDONLY, 0)) < 0) {
+   open(".", O_RDONLY | O_CLOEXEC)) < 0) {
p->fts_errno = errno;
p->fts_info = FTS_ERR;
} else
@@ -513,7 +515,7 @@ fts_children(FTS *sp, int instr)
ISSET(FTS_NOCHDIR))
return (sp->fts_child = fts_build(sp, instr));
 
-   if ((fd = open(".", O_RDONLY, 0)) < 0)
+   if ((fd = open(".", O_RDONLY | O_CLOEXEC)) < 0)
return (NULL);
sp->fts_child = fts_build(sp, instr);
if (fchdir(fd)) {
@@ -1026,7 +1028,7 @@ fts_safe_changedir(FTS *sp, FTSENT *p, i
newfd = fd;
if (ISSET(FTS_NOCHDIR))
return (0);
-   if (fd < 0 && (newfd = open(path, O_RDONLY, 0)) < 0)
+   if (fd < 0 && (newfd = open(path, O_RDONLY|O_DIRECTORY|O_CLOEXEC)) < 0)
return (-1);
if (fstat(newfd, &sb)) {
ret = -1;



Re: fts: optimize by using fstatat instead of lstat

2014-11-19 Thread Todd C. Miller
I was considering doing something like this was but was unsure about
interactions with FTS_LOGICAL (and thus FTS_NOCHDIR).  I suppose
since fts_name is effectively d_name this is OK.

I also prefer passing dirp directly so the caller doesn't need to
use dirfd itself but that's not a big deal.

 - todd



Re: fts: optimize by using fstatat instead of lstat

2014-11-18 Thread Philip Guenther
On Tue, 18 Nov 2014, Todd C. Miller wrote:
> On Tue, 18 Nov 2014 15:19:50 +0100, Martin Pieuchot wrote:
> 
> > Here's the diff inline to facilitate review:
> 
> Thank you, I was having trouble navigating the web version to find
> a unified diff.
> 
> The diff looks good to me.  I took a stab at using fstatat() instead
> of the existing lstat() to avoid the extra else clause but it just
> made things uglier.

Hmm, I think I like how FreeBSD did this a bit better, as they optimize to 
fstatat() whenever fts isn't chdir'ing, including in the FTS_LOGICAL case.

(They also have some O_DIRECTORY and O_CLOEXEC diffs to pull in...)

Here's FreeBSD's r260571, tweaked to apply and with a pointless use of the 
comma operator eliminated.  Their commit message:


r260571 | jilles | 2014-01-12 12:30:55 -0800 (Sun, 12 Jan 2014) | 9 lines

fts: Stat things relative to the directory fd, if possible.

As a result, the kernel needs to process shorter pathnames if fts is not
changing directories (if fts follows symlinks (-L option to utilities), fts
cannot open "." or FTS_NOCHDIR was specified).

Side effect: If pathnames exceed PATH_MAX, [ENAMETOOLONG] is not hit at the
stat stage but later (opendir or application fts_accpath) or not at all.



enh, this look as good in the perf measurement cited in that googlesource 
link?


Philip


Index: gen/fts.c
===
RCS file: /cvs/src/lib/libc/gen/fts.c,v
retrieving revision 1.47
diff -u -p -r1.47 fts.c
--- gen/fts.c   8 Oct 2014 04:36:23 -   1.47
+++ gen/fts.c   19 Nov 2014 06:48:56 -
@@ -49,7 +49,7 @@ static size_t  fts_maxarglen(char * cons
 static void fts_padjust(FTS *, FTSENT *);
 static int  fts_palloc(FTS *, size_t);
 static FTSENT  *fts_sort(FTS *, FTSENT *, int);
-static u_short  fts_stat(FTS *, FTSENT *, int);
+static u_short  fts_stat(FTS *, FTSENT *, int, int);
 static int  fts_safe_changedir(FTS *, FTSENT *, int, char *);
 
 #defineISDOT(a)(a[0] == '.' && (!a[1] || (a[1] == '.' && 
!a[2])))
@@ -116,7 +116,7 @@ fts_open(char * const *argv, int options
p->fts_level = FTS_ROOTLEVEL;
p->fts_parent = parent;
p->fts_accpath = p->fts_name;
-   p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW));
+   p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW), -1);
 
/* Command-line "." and ".." are real directories. */
if (p->fts_info == FTS_DOT)
@@ -270,7 +270,7 @@ fts_read(FTS *sp)
 
/* Any type of file may be re-visited; re-stat and re-turn. */
if (instr == FTS_AGAIN) {
-   p->fts_info = fts_stat(sp, p, 0);
+   p->fts_info = fts_stat(sp, p, 0, -1);
return (p);
}
 
@@ -282,7 +282,7 @@ fts_read(FTS *sp)
 */
if (instr == FTS_FOLLOW &&
(p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) {
-   p->fts_info = fts_stat(sp, p, 1);
+   p->fts_info = fts_stat(sp, p, 1, -1);
if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
if ((p->fts_symfd = open(".", O_RDONLY, 0)) < 0) {
p->fts_errno = errno;
@@ -371,7 +371,7 @@ next:   tmp = p;
if (p->fts_instr == FTS_SKIP)
goto next;
if (p->fts_instr == FTS_FOLLOW) {
-   p->fts_info = fts_stat(sp, p, 1);
+   p->fts_info = fts_stat(sp, p, 1, -1);
if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
if ((p->fts_symfd =
open(".", O_RDONLY, 0)) < 0) {
@@ -716,10 +716,11 @@ mem1: saved_errno = errno;
if (ISSET(FTS_NOCHDIR)) {
p->fts_accpath = p->fts_path;
memmove(cp, p->fts_name, p->fts_namelen + 1);
-   } else
+   p->fts_info = fts_stat(sp, p, 0, dirfd(dirp));
+   } else {
p->fts_accpath = p->fts_name;
-   /* Stat it. */
-   p->fts_info = fts_stat(sp, p, 0);
+   p->fts_info = fts_stat(sp, p, 0, -1);
+   }
 
/* Decrement link count if applicable. */
if (nlinks > 0 && (p->fts_info == FTS_D ||
@@ -786,13 +787,20 @@ mem1: saved_errno = errno;
 }
 
 static u_short
-fts_stat(FTS *sp, FTSENT *p, int follow)
+fts_stat(FTS *sp, FTSENT *p, int follow, int dfd)
 {
FTSENT *t;
dev_t dev;
ino_t ino;
struct stat *sbp, sb;
int saved_errno;
+  

Re: fts: optimize by using fstatat instead of lstat

2014-11-18 Thread Todd C. Miller
On Tue, 18 Nov 2014 15:19:50 +0100, Martin Pieuchot wrote:

> Here's the diff inline to facilitate review:

Thank you, I was having trouble navigating the web version to find
a unified diff.

The diff looks good to me.  I took a stab at using fstatat() instead
of the existing lstat() to avoid the extra else clause but it just
made things uglier.

OK millert@

 - todd



Re: fts: optimize by using fstatat instead of lstat

2014-11-18 Thread Martin Pieuchot
On 17/11/14(Mon) 17:00, enh wrote:
> Sony sent us (Android) a patch to use fstatat instead of lstat in fts.
> Sadly our copy of fts.c is already a little diverged from yours
> (because we have to use strlen to get the length of the d_name, not
> having a d_namlen field) but i thought i'd forward this your way
> anyway to reduce the need for further divergence...
> 
> https://android-review.googlesource.com/#/c/113415/

Here's the diff inline to facilitate review:

Index: gen/fts.c
===
RCS file: /home/ncvs/src/lib/libc/gen/fts.c,v
retrieving revision 1.47
diff -u -p -r1.47 fts.c
--- gen/fts.c   8 Oct 2014 04:36:23 -   1.47
+++ gen/fts.c   18 Nov 2014 14:17:07 -
@@ -49,7 +49,7 @@ static size_t  fts_maxarglen(char * cons
 static void fts_padjust(FTS *, FTSENT *);
 static int  fts_palloc(FTS *, size_t);
 static FTSENT  *fts_sort(FTS *, FTSENT *, int);
-static u_short  fts_stat(FTS *, FTSENT *, int);
+static u_short  fts_stat(FTS *, FTSENT *, int, DIR *);
 static int  fts_safe_changedir(FTS *, FTSENT *, int, char *);
 
 #defineISDOT(a)(a[0] == '.' && (!a[1] || (a[1] == '.' && 
!a[2])))
@@ -116,7 +116,7 @@ fts_open(char * const *argv, int options
p->fts_level = FTS_ROOTLEVEL;
p->fts_parent = parent;
p->fts_accpath = p->fts_name;
-   p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW));
+   p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW), NULL);
 
/* Command-line "." and ".." are real directories. */
if (p->fts_info == FTS_DOT)
@@ -270,7 +270,7 @@ fts_read(FTS *sp)
 
/* Any type of file may be re-visited; re-stat and re-turn. */
if (instr == FTS_AGAIN) {
-   p->fts_info = fts_stat(sp, p, 0);
+   p->fts_info = fts_stat(sp, p, 0, NULL);
return (p);
}
 
@@ -282,7 +282,7 @@ fts_read(FTS *sp)
 */
if (instr == FTS_FOLLOW &&
(p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) {
-   p->fts_info = fts_stat(sp, p, 1);
+   p->fts_info = fts_stat(sp, p, 1, NULL);
if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
if ((p->fts_symfd = open(".", O_RDONLY, 0)) < 0) {
p->fts_errno = errno;
@@ -371,7 +371,7 @@ next:   tmp = p;
if (p->fts_instr == FTS_SKIP)
goto next;
if (p->fts_instr == FTS_FOLLOW) {
-   p->fts_info = fts_stat(sp, p, 1);
+   p->fts_info = fts_stat(sp, p, 1, NULL);
if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
if ((p->fts_symfd =
open(".", O_RDONLY, 0)) < 0) {
@@ -719,7 +719,7 @@ mem1:   saved_errno = errno;
} else
p->fts_accpath = p->fts_name;
/* Stat it. */
-   p->fts_info = fts_stat(sp, p, 0);
+   p->fts_info = fts_stat(sp, p, 0, dirp);
 
/* Decrement link count if applicable. */
if (nlinks > 0 && (p->fts_info == FTS_D ||
@@ -786,7 +786,7 @@ mem1:   saved_errno = errno;
 }
 
 static u_short
-fts_stat(FTS *sp, FTSENT *p, int follow)
+fts_stat(FTS *sp, FTSENT *p, int follow, DIR *dirp)
 {
FTSENT *t;
dev_t dev;
@@ -810,6 +810,11 @@ fts_stat(FTS *sp, FTSENT *p, int follow)
return (FTS_SLNONE);
}
p->fts_errno = saved_errno;
+   goto err;
+   }
+   } else if (dirp) {
+   if (fstatat(dirfd(dirp), p->fts_name, sbp, 
AT_SYMLINK_NOFOLLOW)) {
+   p->fts_errno = errno;
goto err;
}
} else if (lstat(p->fts_accpath, sbp)) {