The attached patch is a cleanup of the recurse_scan_intern() function in
src/core/share.c. I was looking at the code, and noticed redundant calls to
stat(), and by the time I got through eliminating them the structure of the
code had changed noticeably.
The new version is driven by a single while() loop. Directories are added
to a list for later processing, while files are fully processed immediately
(no more intermediate list).
Patch was made against SVN r12353
Summary of changes:
src/core/share.c:
Modified recurse_scan_intern()
Added shared_file_valid_extension()
src/core/share/h:
Added shared_file_valid_extension() prototype
src/lib/file.h
Added S_ISLNK macro (this seemed like the correct file for this)
Testing shows that the new code is slightly faster than the original (about
2% faster for 7659 files processed).
I have NOT replaced the call to "request_sha1()" - I just added a large
comment where it should be added. This change need to be synchronized with
the changes to "src/core/huge.c"
Lloyd Bryant
diff -ru -X diff.exclude /usr/local/src/base.gtkg/gtk-gnutella/src/core/share.c src/core/share.c
--- /usr/local/src/base.gtkg/gtk-gnutella/src/core/share.c 2006-11-18 01:17:55.000000000 -0700
+++ src/core/share.c 2006-11-18 16:45:11.000000000 -0700
@@ -932,15 +932,15 @@
static void
recurse_scan_intern(const gchar * const base_dir, const gchar * const dir)
{
- GSList *exts = NULL;
DIR *directory; /* Dir stream used by opendir, readdir etc.. */
struct dirent *dir_entry;
GSList *files = NULL;
GSList *directories = NULL;
GSList *sl;
- guint i;
- const gchar *entry_end;
gchar *dir_name;
+ tm_t base, current, elapsed;
+
+ tm_now_exact(&base);
g_return_if_fail('\0' != dir[0]);
g_return_if_fail(is_absolute_path(base_dir));
@@ -951,145 +951,156 @@
return;
}
- while ((dir_entry = readdir(directory))) {
- struct stat sb;
- gchar *full;
-
- if (dir_entry->d_name[0] == '.') /* Hidden file, or "." or ".." */
- continue;
-
- full = make_pathname(dir, dir_entry->d_name);
- if (stat(full, &sb)) {
- g_warning("can't stat %s: %s", full, g_strerror(errno));
- break;
- }
- if (S_ISDIR(sb.st_mode)) {
- if (!scan_ignore_symlink_dirs || !is_symlink(full)) {
- directories = g_slist_prepend(directories, full);
- full = NULL;
- }
- } else if (S_ISREG(sb.st_mode)) {
- if (!scan_ignore_symlink_regfiles || !is_symlink(full)) {
- files = g_slist_prepend(files, full);
- full = NULL;
- }
- }
- G_FREE_NULL(full);
- }
-
+ /* Get relative path if required */
if (search_results_expose_relative_paths) {
dir_name = get_relative_path(base_dir, dir);
} else {
dir_name = NULL;
}
+
+ /* Process the entries in this directory */
+ while ((dir_entry = readdir(directory))) {
+ struct stat file_stat;
+ gchar *fullpath;
+ gboolean is_slink = FALSE;
+ gchar *name;
+ struct shared_file *found = NULL;
- for (i = 0, sl = files; sl; i++, sl = g_slist_next(sl)) {
- const gchar *full, *name;
-
- full = sl->data;
-
- /*
- * In the "tmp" directory, don't share files that have a trailer.
- * It's probably a file being downloaded, and which is not complete yet.
- * This check is necessary in case they choose to share their
- * downloading directory...
- */
- name = strrchr(full, G_DIR_SEPARATOR);
- g_assert(name && G_DIR_SEPARATOR == name[0]);
- name++; /* Start of file name */
+ if (dir_entry->d_name[0] == '.') /* Hidden file, or "." or ".." */
+ continue;
- entry_end = strchr(name, '\0');
+ fullpath = make_pathname(dir, dir_entry->d_name);
- for (exts = extensions; exts; exts = exts->next) {
- struct extension *e = exts->data;
- const gchar *start = entry_end - (e->len + 1); /* +1 for "." */
+ /* First, lstat the file */
+ if (lstat(fullpath, &file_stat)) {
+ g_warning("can't lstat %s: %s", fullpath, g_strerror(errno));
+ G_FREE_NULL(fullpath);
+ continue;
+ }
- /*
- * Look for the trailing chars (we're matching an extension).
- * Matching is case-insensitive, and the extension opener is ".".
- *
- * An extension "--all--" matches all files, even if they
- * don't have any extension. [Patch from Zygo Blaxell].
- */
+ /* If it's a symlink, call stat to get info on the linked file */
+ if (S_ISLNK(file_stat.st_mode)) {
+ is_slink = TRUE;
+ if (stat(fullpath, &file_stat)) {
+ g_warning("Broken symlink %s: %s", fullpath, g_strerror(errno));
+ G_FREE_NULL(fullpath);
+ continue;
+ }
+ }
+
+ /*
+ * For symlinks, we check whether we are supposed to process symlinks
+ * for that type of entry, then either proceed or skip the entry.
+ */
+ if (S_ISDIR(file_stat.st_mode)) {
+ if (!is_slink || !scan_ignore_symlink_dirs) {
- if (
- 0 == ascii_strcasecmp("--all--", e->str) || /* All files */
- (start >= name && *start == '.' &&
- 0 == ascii_strcasecmp(start + 1, e->str))
- ) {
- struct shared_file *found = NULL;
- struct stat file_stat;
+ /* If a directory, add to list for later processing */
+ directories = g_slist_prepend(directories, fullpath);
+ fullpath = NULL;
+ }
+ } else if (S_ISREG(file_stat.st_mode)) {
+ if (!is_slink || !scan_ignore_symlink_regfiles) {
- if (share_debug > 5)
- g_message("recurse_scan: full=\"%s\"", full);
+ /*
+ * It's a regular file - validate it, and add it to
+ * shared_files
+ */
+ name = strrchr(fullpath, G_DIR_SEPARATOR); /* find last '/' */
+ g_assert(name && G_DIR_SEPARATOR == name[0]);
+ name++; /* Increment to start of filename */
- if (contains_control_chars(full)) {
+ /* Some sanity checks on the file */
+ if (contains_control_chars(fullpath)) {
g_warning("Not sharing filename with control characters: "
- "\"%s\"", full);
- break;
+ "\"%s\"", fullpath);
+ G_FREE_NULL(fullpath);
+ continue;
}
- if (stat(full, &file_stat) == -1) {
- g_warning("can't stat %s: %s", full, g_strerror(errno));
- break;
+ if (too_big_for_gnutella(file_stat.st_size)) {
+ g_warning("File is too big to be shared: \"%s\"", fullpath);
+ G_FREE_NULL(fullpath);
+ continue;
}
if (0 == file_stat.st_size) {
if (share_debug > 5)
- g_warning("Not sharing empty file: \"%s\"", full);
- break;
+ g_warning("Not sharing empty file: \"%s\"", fullpath);
+ G_FREE_NULL(fullpath);
+ continue;
}
- if (!S_ISREG(file_stat.st_mode)) {
- g_warning("Not sharing non-regular file: \"%s\"", full);
- break;
+ if (!shared_file_valid_extension(name)) {
+ G_FREE_NULL(fullpath);
+ continue;
}
- if (too_big_for_gnutella(file_stat.st_size)) {
- g_warning("File is too big to be shared: \"%s\"", full);
- break;
- }
+ if (share_debug > 5)
+ g_message("recurse_scan: full=\"%s\"", fullpath);
+ /* Checks complete - add the file */
found = shared_file_alloc();
- found->file_path = atom_str_get(full);
+ found->file_path = atom_str_get(fullpath);
found->relative_path = dir_name ? atom_str_get(dir_name) : NULL;
found->file_size = file_stat.st_size;
found->mtime = file_stat.st_mtime;
- found->content_type =
- share_mime_type(SHARE_M_APPLICATION_BINARY);
+ found->content_type = share_mime_type(
+ SHARE_M_APPLICATION_BINARY);
+
if (shared_file_set_names(found, name)) {
shared_file_free(&found);
- break;
- }
-
- if (!sha1_is_cached(found) && file_info_has_trailer(full)) {
- /*
- * It's probably a file being downloaded, and which is
- * not complete yet. This check is necessary in case
- * they choose to share their downloading directory...
- */
+ continue;
+ }
- g_warning("will not share partial file \"%s\"", full);
- shared_file_free(&found);
- break;
+ if (!sha1_is_cached(found)) {
+ if (file_info_has_trailer(found->file_path)) {
+ /*
+ * It's probably a file being downloaded, and which is
+ * not complete yet. This check is necessary in case
+ * they choose to share their downloading directory...
+ */
+
+ g_warning("will not share partial file \"%s\"",
+ fullpath);
+ shared_file_free(&found);
+ continue; /* while ((dir_entry... */
+ }
}
+ /* XXX
+ * An else clause here would be if the file WAS found in the
+ * sha1_cache. Good place to set the SHA1 in "found".
+ */
files_scanned++;
bytes_scanned += found->file_size;
st_insert_item(search_table, found->name_canonic, found);
- shared_files = g_slist_prepend(shared_files,
- shared_file_ref(found));
- break; /* for loop */
+ shared_files = g_slist_prepend(shared_files,
+ shared_file_ref(found));
+ G_FREE_NULL(fullpath);
}
}
- G_FREE_NULL(sl->data);
- if (!(i & 0x3f)) {
+ /*
+ * gcu_gtk_main_flush() processes all pending GUI events.
+ *
+ * I'm setting it to trigger anytime the elapsed exceeds 50ms.
+ * This should keep the GUI responsive, even if we hit a
+ * directory with a large number of files to process.
+ */
+ tm_now_exact(¤t);
+ tm_elapsed(&elapsed, ¤t, &base);
+ if (50 <= tm2ms(&elapsed)) {
+ memcpy(&base, ¤t, sizeof(tm_t));
gcu_gtk_main_flush();
}
}
+
+ /* Execute this at least once per directory processed */
+ gcu_gtk_main_flush();
+
gcu_gui_update_files_scanned(); /* Interim view */
atom_str_free_null(&dir_name);
@@ -2678,4 +2689,43 @@
return files_scanned;
}
+/**
+ * Verify that a file extension is valid for sharing
+ *
+ * @param filename The name of the file to check (without path).
+ */
+gboolean
+shared_file_valid_extension(gchar *filename)
+{
+ GSList *exts;
+ gchar *entry_end;
+ struct extension *e;
+ gchar *start;
+
+ entry_end = strchr(filename, '\0');
+
+ /*
+ * Match the file extension (if any) against the extensions list.
+ * All valid extensions start with '.'. Matching is case-insensitive
+ */
+ for (exts = extensions; exts; exts = exts->next) {
+ e = exts->data;
+ start = entry_end - (e->len + 1); /* +1 for "." */
+
+ /*
+ * An extension "--all--" matches all files, even those that don't
+ * have any extension. [Patch from Zygo Blaxell].
+ */
+ if (0 == ascii_strcasecmp("--all--", e->str) ||
+ (start >= filename && *start == '.' &&
+ 0 == ascii_strcasecmp(start + 1, e->str)))
+ {
+ return TRUE;
+ }
+
+ }
+
+ return FALSE;
+}
+
/* vi: set ts=4 sw=4 cindent: */
diff -ru -X diff.exclude /usr/local/src/base.gtkg/gtk-gnutella/src/core/share.h src/core/share.h
--- /usr/local/src/base.gtkg/gtk-gnutella/src/core/share.h 2006-11-18 01:17:55.000000000 -0700
+++ src/core/share.h 2006-11-18 16:22:00.000000000 -0700
@@ -130,6 +130,7 @@
fileinfo_t *shared_file_fileinfo(const shared_file_t *sf);
const gchar *shared_file_content_type(const shared_file_t *sf);
void shared_file_from_fileinfo(fileinfo_t *fi);
+gboolean shared_file_valid_extension(gchar *filename);
const gchar *map_muid_to_query_string(const gchar muid[GUID_RAW_SIZE]);
diff -ru -X diff.exclude /usr/local/src/base.gtkg/gtk-gnutella/src/lib/file.h src/lib/file.h
--- /usr/local/src/base.gtkg/gtk-gnutella/src/lib/file.h 2006-11-18 01:17:42.000000000 -0700
+++ src/lib/file.h 2006-11-18 15:47:49.000000000 -0700
@@ -70,5 +70,14 @@
FILE *file_fopen(const gchar *path, const gchar *mode);
FILE *file_fopen_missing(const gchar *path, const gchar *mode);
+/*
+ * S_ISLNK is a standard macro on most *nix variants, but the man page notes
+ * that it is not POSIX. This means we can't rely on it being available
+ * everywhere.
+ */
+#ifndef S_ISLNK
+#define S_ISLNK(mode) ((mode & S_IFMT) == S_IFLNK)
+#endif
+
#endif /* _file_ */
/* vi: set ts=4 sw=4 cindent: */
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Gtk-gnutella-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gtk-gnutella-devel