This revision reduces code size in concat_path_file_fast() by 19 bytes. It also 
applies the new get_d_namlen() optimization to a readdir() loop in 
runit/svlogd.c.

If there are any changes that need to be made for inclusion then please let me 
know as soon as possible. I'd like to finish this project up. If the old/slow 
and new/fast code should be chosen with a compile-time config option then I'm 
happy to do that as well.
From 69312a6928c188ac8be3c714db3a53724b85dd09 Mon Sep 17 00:00:00 2001
From: Jody Bruchon <j...@jodybruchon.com>
Date: Wed, 10 Apr 2024 18:08:00 -0400
Subject: [PATCH v2] Huge performance boost for recursion (cp, du, find, ls,
 rm, mv)

This patch uses pre-calculated name lengths to massively speed up various
recursive operations. Three new *_fast variant functions are added along
with get_d_namlen copied from libjodycode. Passing lengths allows use of
memcpy() instead of strcpy()/strcat() and replacement of a particularly
hot xasprintf(). Cachegrind shows CPU instructions on Linux x86_64 drop
by 24% to 67% with similar reductions in data reads and writes.

Anything in BusyBox that uses a while(readdir()) loop or that calls
concat_*path_file() or last_char_is() might benefit from adopting this
optimization framework.

Bloat-O-Meter:

function                                             old     new   delta
concat_path_file_fast                                  -     194    +194
get_d_namlen                                           -      36     +36
concat_subpath_file_fast                               -      31     +31
last_char_is_fast                                      -      26     +26
complete_cmd_dir_file                                992    1002     +10
copy_file                                           1831    1834      +3
remove_file                                          708     707      -1
recursive_action1                                    420     419      -1
du                                                   468     467      -1
scan_and_display_dirs_recur                          675     672      -3
concat_subpath_file                                   39       -     -39
------------------------------------------------------------------------------
(add/remove: 5/1 grow/shrink: 2/4 up/down: 300/-45)           Total: 255 bytes

Cachegrind tests (-original, +improved):

--------------------------------------------------------------------------------
            Ir                  I1mr           ILmr           Dr                 D1mr           DLmr           Dw                 D1mw           DLmw
--------------------------------------------------------------------------------
cg_diff_cp:-1,811,369 (100.0%) 1,544 (100.0%) 1,514 (100.0%) 379,597 (100.0%) 3,151 (100.0%) 2,183 (100.0%) 249,874 (100.0%) 1,218 (100.0%) 1,160 (100.0%)  PROGRAM TOTALS
cg_diff_cp:+1,310,239 (100.0%) 1,550 (100.0%) 1,519 (100.0%) 290,298 (100.0%) 3,152 (100.0%) 2,183 (100.0%) 184,883 (100.0%) 1,218 (100.0%) 1,160 (100.0%)  PROGRAM TOTALS

cg_diff_du:-11,080,026 (100.0%) 1,692 (100.0%) 1,627 (100.0%) 2,345,969 (100.0%) 5,603 (100.0%) 2,524 (100.0%) 1,537,107 (100.0%) 1,838 (100.0%) 1,342 (100.0%)  PROGRAM TOTALS
cg_diff_du:+4,522,979 (100.0%) 1,635 (100.0%) 1,592 (100.0%) 1,189,256 (100.0%) 4,911 (100.0%) 2,513 (100.0%) 784,551 (100.0%) 1,636 (100.0%) 1,287 (100.0%)  PROGRAM TOTALS

cg_diff_find:-10,719,682 (100.0%) 1,638 (100.0%) 1,592 (100.0%) 2,360,985 (100.0%) 4,149 (100.0%) 2,634 (100.0%) 1,493,014 (100.0%) 1,096 (100.0%) 836 (100.0%)  PROGRAM TOTALS
cg_diff_find:+4,212,414 (100.0%) 1,527 (100.0%) 1,498 (100.0%) 1,215,858 (100.0%) 3,748 (100.0%) 2,629 (100.0%) 734,040 (100.0%) 850 (100.0%) 732 (100.0%)  PROGRAM TOTALS

cg_diff_ls:-17,363,363 (100.0%) 1,984 (100.0%) 1,731 (100.0%) 3,751,223 (100.0%) 33,435 (100.0%) 2,439 (100.0%) 2,805,925 (100.0%) 9,422 (100.0%) 2,713 (100.0%)  PROGRAM TOTALS
cg_diff_ls:+11,166,139 (100.0%) 1,774 (100.0%) 1,683 (100.0%) 2,666,248 (100.0%) 31,111 (100.0%) 2,671 (100.0%) 2,100,224 (100.0%) 9,007 (100.0%) 2,474 (100.0%)  PROGRAM TOTALS

cg_diff_rm:-6,176,069 (100.0%) 1,585 (100.0%) 1,537 (100.0%) 1,298,524 (100.0%) 3,536 (100.0%) 2,351 (100.0%) 830,656 (100.0%) 905 (100.0%) 802 (100.0%)  PROGRAM TOTALS
cg_diff_rm:+2,039,241 (100.0%) 1,459 (100.0%) 1,429 (100.0%) 573,877 (100.0%) 3,361 (100.0%) 2,438 (100.0%) 379,660 (100.0%) 724 (100.0%) 663 (100.0%)  PROGRAM TOTALS

svlogd: rmoldest(): use get_d_namlen()
last_char_is_fast: more robust parameter check

concat_path_file_fast: copy null byte instead of adding one later

The file name will always end in a null byte, so copy it, saving
18 bytes of code.

function                                             old     new   delta
concat_path_file_fast                                193     175     -18
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-18)             Total: -18 bytes

Signed-off-by: Jody Bruchon <j...@jodybruchon.com>
---
 coreutils/du.c              |  3 ++-
 coreutils/ls.c              |  3 ++-
 include/libbb.h             |  4 ++++
 libbb/Kbuild.src            |  1 +
 libbb/concat_path_file.c    | 31 +++++++++++++++++++++++++++++++
 libbb/concat_subpath_file.c |  8 ++++++++
 libbb/copy_file.c           |  3 ++-
 libbb/get_d_namlen.c        | 29 +++++++++++++++++++++++++++++
 libbb/last_char_is.c        | 10 ++++++++++
 libbb/lineedit.c            |  3 ++-
 libbb/recursive_action.c    |  4 +++-
 libbb/remove_file.c         |  3 ++-
 runit/svlogd.c              |  3 ++-
 13 files changed, 98 insertions(+), 7 deletions(-)
 create mode 100644 libbb/get_d_namlen.c

diff --git a/coreutils/du.c b/coreutils/du.c
index 4652b6300..7598b15b7 100644
--- a/coreutils/du.c
+++ b/coreutils/du.c
@@ -200,7 +200,8 @@ static unsigned long long du(const char *filename)
 		}
 
 		while ((entry = readdir(dir))) {
-			newfile = concat_subpath_file(filename, entry->d_name);
+//			newfile = concat_subpath_file(filename, entry->d_name);
+			newfile = concat_subpath_file_fast(filename, entry);
 			if (newfile == NULL)
 				continue;
 			++G.du_depth;
diff --git a/coreutils/ls.c b/coreutils/ls.c
index cc809b797..f103713f8 100644
--- a/coreutils/ls.c
+++ b/coreutils/ls.c
@@ -949,7 +949,8 @@ static struct dnode **scan_one_dir(const char *path, unsigned *nfiles_p)
 				continue; /* if only -A, skip . and .. but show other dotfiles */
 			}
 		}
-		fullname = concat_path_file(path, entry->d_name);
+//		fullname = concat_path_file(path, entry->d_name);
+		fullname = concat_path_file_fast(path, entry);
 		cur = my_stat(fullname, bb_basename(fullname), 0);
 		if (!cur) {
 			free(fullname);
diff --git a/include/libbb.h b/include/libbb.h
index ef5d04713..95a827754 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -563,6 +563,7 @@ char *bb_get_last_path_component_nostrip(const char *path) FAST_FUNC;
 const char *bb_basename(const char *name) FAST_FUNC;
 /* NB: can violate const-ness (similarly to strchr) */
 char *last_char_is(const char *s, int c) FAST_FUNC;
+char *last_char_is_fast(const char *s, int c, int len) FAST_FUNC;
 const char* endofname(const char *name) FAST_FUNC;
 char *is_prefixed_with(const char *string, const char *key) FAST_FUNC;
 char *is_suffixed_with(const char *string, const char *key) FAST_FUNC;
@@ -1686,9 +1687,12 @@ void config_close(parser_t *parser) FAST_FUNC;
  * If path is NULL, it is assumed to be "/".
  * filename should not be NULL. */
 char *concat_path_file(const char *path, const char *filename) FAST_FUNC;
+char *concat_path_file_fast(const char *path, const struct dirent *dirp) FAST_FUNC;
 /* Returns NULL on . and .. */
 char *concat_subpath_file(const char *path, const char *filename) FAST_FUNC;
+char *concat_subpath_file_fast(const char *path, const struct dirent *dirp) FAST_FUNC;
 
+size_t get_d_namlen(const struct dirent * const dirent) FAST_FUNC;
 
 int bb_make_directory(char *path, long mode, int flags) FAST_FUNC;
 
diff --git a/libbb/Kbuild.src b/libbb/Kbuild.src
index c3b30003f..154584462 100644
--- a/libbb/Kbuild.src
+++ b/libbb/Kbuild.src
@@ -39,6 +39,7 @@ lib-y += find_pid_by_name.o
 lib-y += find_root_device.o
 lib-y += full_write.o
 lib-y += get_console.o
+lib-y += get_d_namlen.o
 lib-y += get_last_path_component.o
 lib-y += get_line_from_file.o
 lib-y += getpty.o
diff --git a/libbb/concat_path_file.c b/libbb/concat_path_file.c
index 5b4b7f113..0574b7e39 100644
--- a/libbb/concat_path_file.c
+++ b/libbb/concat_path_file.c
@@ -26,3 +26,34 @@ char* FAST_FUNC concat_path_file(const char *path, const char *filename)
 		filename++;
 	return xasprintf("%s%s%s", path, (lc==NULL ? "/" : ""), filename);
 }
+
+
+char* FAST_FUNC concat_path_file_fast(const char *path, const struct dirent *dirp)
+{
+	const char *filename = dirp->d_name;
+	char *buf;
+	int lc_slash = 0;
+	int name_offset, end_offset;
+	int pathlen, namelen;
+
+	if (!path) {
+		path = "";
+		pathlen = 0;
+	} else {
+		pathlen = strlen(path);
+		if (last_char_is_fast(path, '/', pathlen) == NULL) lc_slash = 1;
+	}
+	namelen = get_d_namlen(dirp);
+	while (*filename == '/') {
+		filename++;
+		namelen--;
+	}
+	name_offset = pathlen + lc_slash;
+	end_offset = name_offset + namelen;
+	buf = (char *)malloc(end_offset + 1);
+	if (!buf) return NULL;
+	memcpy(buf, path, pathlen);
+	if (lc_slash == 1) *(buf + pathlen) = '/';
+	memcpy((buf + name_offset), filename, namelen + 1);
+	return buf;
+}
diff --git a/libbb/concat_subpath_file.c b/libbb/concat_subpath_file.c
index bc2ee96ca..dc9c5fe5f 100644
--- a/libbb/concat_subpath_file.c
+++ b/libbb/concat_subpath_file.c
@@ -20,3 +20,11 @@ char* FAST_FUNC concat_subpath_file(const char *path, const char *f)
 		return NULL;
 	return concat_path_file(path, f);
 }
+
+
+char* FAST_FUNC concat_subpath_file_fast(const char *path, const struct dirent *dirp)
+{
+	if (dirp->d_name && DOT_OR_DOTDOT(dirp->d_name))
+		return NULL;
+	return concat_path_file_fast(path, dirp);
+}
diff --git a/libbb/copy_file.c b/libbb/copy_file.c
index 044bc3c20..b39b70ec1 100644
--- a/libbb/copy_file.c
+++ b/libbb/copy_file.c
@@ -197,7 +197,8 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
 		while ((d = readdir(dp)) != NULL) {
 			char *new_source, *new_dest;
 
-			new_source = concat_subpath_file(source, d->d_name);
+//			new_source = concat_subpath_file(source, d->d_name);
+			new_source = concat_subpath_file_fast(source, d);
 			if (new_source == NULL)
 				continue;
 			new_dest = concat_path_file(dest, d->d_name);
diff --git a/libbb/get_d_namlen.c b/libbb/get_d_namlen.c
new file mode 100644
index 000000000..1a9dbcf17
--- /dev/null
+++ b/libbb/get_d_namlen.c
@@ -0,0 +1,29 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * Fast acquisition of d_namlen
+ *
+ * Copyright (C) 2024 by Jody Bruchon <j...@jodybruchon.com>
+ * Copied from libjodycode for use with BusyBox
+ *
+ * Licensed under GPLv2 or later, see file LICENSE in this source tree.
+ */
+
+#include "libbb.h"
+
+/* Extract d_namlen from struct dirent */
+size_t get_d_namlen(const struct dirent * const dirent)
+{
+#ifdef _DIRENT_HAVE_D_NAMLEN
+	return dirent->d_namlen;
+#elif defined _DIRENT_HAVE_D_RECLEN
+	const size_t base = (sizeof(struct dirent) - sizeof(((struct dirent *)0)->d_name)) - offsetof(struct dirent, d_name) - 1;
+	size_t skip;
+
+	skip = dirent->d_reclen - (sizeof(struct dirent) - sizeof(((struct dirent *)0)->d_name));
+	if (skip > 0) skip -= base;
+	return skip + strlen(dirent->d_name + skip);
+#else
+	return strlen(dirent->d_name);
+#endif
+	return 0;
+}
diff --git a/libbb/last_char_is.c b/libbb/last_char_is.c
index fba05f974..db2ae241d 100644
--- a/libbb/last_char_is.c
+++ b/libbb/last_char_is.c
@@ -17,3 +17,13 @@ char* FAST_FUNC last_char_is(const char *s, int c)
 		s++;
 	return (*s == (char)c) ? (char *) s : NULL;
 }
+
+
+/* Same as last_char_is() but uses a known length to skip the loop */
+char* FAST_FUNC last_char_is_fast(const char *s, int c, int len)
+{
+	if (len < 1)
+		return NULL;
+	s += len - 1;
+	return (*s == (char)c) ? (char *) s : NULL;
+}
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index 543a3f11c..bc38aaf07 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -921,7 +921,8 @@ static NOINLINE unsigned complete_cmd_dir_file(const char *command, int type)
 			if (strncmp(basecmd, name_found, baselen) != 0)
 				continue; /* no */
 
-			found = concat_path_file(lpath, name_found);
+//			found = concat_path_file(lpath, name_found);
+			found = concat_path_file_fast(lpath, next);
 			/* NB: stat() first so that we see is it a directory;
 			 * but if that fails, use lstat() so that
 			 * we still match dangling links */
diff --git a/libbb/recursive_action.c b/libbb/recursive_action.c
index b1c4bfad7..54697f034 100644
--- a/libbb/recursive_action.c
+++ b/libbb/recursive_action.c
@@ -10,6 +10,7 @@
 
 #undef DEBUG_RECURS_ACTION
 
+
 /*
  * Walk down all the directories under the specified
  * location, and do something (something specified
@@ -126,7 +127,8 @@ static int recursive_action1(recursive_state_t *state, const char *fileName)
 		char *nextFile;
 		int s;
 
-		nextFile = concat_subpath_file(fileName, next->d_name);
+//		nextFile = concat_subpath_file(fileName, next->d_name);
+		nextFile = concat_subpath_file_fast(fileName, next);
 		if (nextFile == NULL)
 			continue;
 
diff --git a/libbb/remove_file.c b/libbb/remove_file.c
index 1505e6218..1cfe839a0 100644
--- a/libbb/remove_file.c
+++ b/libbb/remove_file.c
@@ -53,7 +53,8 @@ int FAST_FUNC remove_file(const char *path, int flags)
 		while ((d = readdir(dp)) != NULL) {
 			char *new_path;
 
-			new_path = concat_subpath_file(path, d->d_name);
+//			new_path = concat_subpath_file(path, d->d_name);
+			new_path = concat_subpath_file_fast(path, d);
 			if (new_path == NULL)
 				continue;
 			if (remove_file(new_path, flags) < 0)
diff --git a/runit/svlogd.c b/runit/svlogd.c
index f7576f0fa..801805ef6 100644
--- a/runit/svlogd.c
+++ b/runit/svlogd.c
@@ -517,7 +517,8 @@ static void rmoldest(struct logdir *ld)
 		pause2cannot("open directory, want rotate", ld->name);
 	errno = 0;
 	while ((f = readdir(d))) {
-		if ((f->d_name[0] == '@') && (strlen(f->d_name) == 27)) {
+		if ((f->d_name[0] == '@') && (get_d_namlen(f) == 27)) {
+//		if ((f->d_name[0] == '@') && (strlen(f->d_name) == 27)) {
 			if (f->d_name[26] == 't') {
 				if (unlink(f->d_name) == -1)
 					warn2("can't unlink processor leftover", f->d_name);
-- 
2.34.1

_______________________________________________
busybox mailing list
busybox@busybox.net
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to