On 8/6/07, jamie <[EMAIL PROTECTED]> wrote: > On Mon, 2007-08-06 at 13:36 +0200, Marcus Fritzsch wrote: > > On 8/6/07, jamie <[EMAIL PROTECTED]> wrote: > > > On Mon, 2007-08-06 at 04:03 +0200, Marcus Fritzsch wrote: > > > > This patch prevents tracker from descending into mountpoints which is > > > > a directory other than a watch- or crawl-root. I often have some fs > > > > (sshfs, nfs) mounted under my $HOME and found tracker going down all > > > > these trees. Also, there was a bug report on launchpad about this[1]. > > > > > > > > I am however not sure if this method works, also I haven't found much > > > > references when searching for it by google or clusty. If there are > > > > objections to this method of mount-point 'discovery' it would be > > > > possible to read mtab and store it in struct Tracker for processing. > > > > > > > > However as tracker is not that simple I am not aware of any side > > > > effects this patch may have on tracker, I tested it - with mount > > > > points in watched dirs and crawled ones, with fuse and nfs mounts. > > > > > > > > Any comments? > > > > > > > > > > yeah its a good idea > > > > > > but take a look at tracker-utils.c: > > > > > > tracker_file_is_no_watched > > > tracker_file_is_crawled > > > > Thought of this too. The function signature needs a 'current' dir and > > a 'testdir' to check wether testdir is a different device then current > > - there is no problem with that? That would be faster - however I > > could also implement it by checking against the crawl and watch > > directory roots list. Which one do you prefer? > > I would prefer the latter but can we store the device signature of the > watch and crawl dirs somewhere so we are not constantly stating them? > > (Could use a GSList to store them and simply iterate over them)
Here is an overview on how skipping mount points work: 0a. Fields added: Tracker.root_directory_devices and Tracker.skip_mount_points 0b. Functions added: tracker_add_root_dir (), tracker_add_root_directories () and tracker_file_is_in_root_dir (). All in declared tracker-utils.h and implemented in tracker-utils.c. 1. Each service directory is added to the GSList Tracker.root_directory_devices - there will be no doubles - that is checked with a loop, if it gets too slow I could switch it e.g. to a tree. 2. The new functions check for Tracker.skip_mount_points - so no one has to do the check but those added functions. 3. get_files ignores files returned as not being on the same device as the currently read directory (this, only if skip_mount_points is enabled, see above). 4. A config option SkipMountPoints has been added to the Watches section of tracker.cfg. 5. The default is to descend into mounted directories, as it was before. I tested this patch quite a lot, including a --reindex run. Cheers, Marcus
diff --git a/src/trackerd/tracker-utils.c b/src/trackerd/tracker-utils.c index e006676..02d0b4a 100644 --- a/src/trackerd/tracker-utils.c +++ b/src/trackerd/tracker-utils.c @@ -1148,6 +1148,94 @@ tracker_file_is_crawled (const char* uri) } +void +tracker_add_root_dir (const char *uri) +{ + struct stat st; + + if (! tracker->skip_mount_points) { + return; + } + + if (!uri || uri[0] != '/') { + return; + } + + if (g_stat (uri, &st) == 0) { + GSList * cur = NULL; + dev_t * elem = NULL; + + if (! S_ISDIR (st.st_mode)) { + return; + } + + /* FIXME: too costly? */ + for (cur = tracker->root_directory_devices; cur; cur = g_slist_next (cur)) { + if (cur->data && *((dev_t *) cur->data) == st.st_dev) { + return; + } + } + + elem = g_new (dev_t, 1); + * elem = st.st_dev; + tracker->root_directory_devices = g_slist_prepend (tracker->root_directory_devices, elem); + } else { + tracker_log ("Could not stat `%s'", uri); + } +} + + +void +tracker_add_root_directories (GSList * uri_list) +{ + GSList * cur = NULL; + + if (! tracker->skip_mount_points) { + return; + } + + for (cur = uri_list; cur; cur = g_slist_next (cur)) { + tracker_add_root_dir ((const char *) cur->data); + } +} + + +gboolean +tracker_file_is_in_root_dir (const char *uri) +{ + struct stat st; + GSList * cur = NULL; + dev_t uri_dev = 0; + + if (! tracker->skip_mount_points) { + return TRUE; + } + + if (!uri || uri[0] != '/') { + return FALSE; + } + + if (g_stat (uri, &st) == 0) { + uri_dev = st.st_dev; + } else { + tracker_log ("Could not stat `%s'", uri); + return TRUE; /* the caller should take care of skipping this one */ + } + + if (! S_ISDIR (st.st_mode)) { /* only directories are mount points and therefore checked */ + return TRUE; + } + + for (cur = tracker->root_directory_devices; cur; cur = g_slist_next (cur)) { + if (cur->data && *((dev_t *) cur->data) == uri_dev) { + return TRUE; + } + } + + return FALSE; +} + + gboolean tracker_file_info_is_valid (FileInfo *info) { @@ -1861,6 +1949,13 @@ get_files (const char *dir, gboolean dir_only, gboolean skip_ignored_files) g_free (str); if (!tracker_file_is_valid (mystr)) { + g_free (mystr); + continue; + } + + if (!tracker_file_is_in_root_dir (mystr)) { + tracker_log ("Skipping mount point %s", mystr); + g_free (mystr); continue; } @@ -2305,7 +2400,9 @@ tracker_load_config_file (void) "# List of directory roots to not index and not watch seperated by semicolons\n", "NoWatchDirectory=\n", "# Set to false to prevent watching of any kind\n", - "EnableWatching=true\n\n", + "EnableWatching=true\n", + "# Set to true prevents tracker from descending into mounted directory trees\n", + "SkipMountPoints=false\n\n", "[Indexing]\n", "# Throttles the indexing process. Allowable values are 0-20. higher values decrease indexing speed\n", "Throttle=0\n", @@ -2416,6 +2513,10 @@ tracker_load_config_file (void) tracker->enable_watching = g_key_file_get_boolean (key_file, "Watches", "EnableWatching", NULL); } + if (g_key_file_has_key (key_file, "Watches", "SkipMountPoints", NULL)) { + tracker->skip_mount_points = g_key_file_get_boolean (key_file, "Watches", "SkipMountPoints", NULL); + } + /* Indexing options */ diff --git a/src/trackerd/tracker-utils.h b/src/trackerd/tracker-utils.h index 76b8c49..9ad9e6b 100644 --- a/src/trackerd/tracker-utils.h +++ b/src/trackerd/tracker-utils.h @@ -248,6 +248,9 @@ typedef struct { gboolean first_flush; gboolean do_optimize; + gboolean skip_mount_points; /* should tracker descend into mounted directories? see Tracker.root_directory_devices */ + GSList * root_directory_devices; + IndexStatus index_status; /* battery and ac power status file */ @@ -517,8 +520,12 @@ gboolean tracker_file_is_indexable (const char *uri); gboolean tracker_is_directory (const char *dir); -gboolean tracker_file_is_no_watched (const char* uri); -gboolean tracker_file_is_crawled (const char* uri); +gboolean tracker_file_is_no_watched (const char *uri); +gboolean tracker_file_is_crawled (const char *uri); + +void tracker_add_root_dir (const char *uri); /* add a directory to the list of watch/crawl/service roots */ +void tracker_add_root_directories (GSList *uri_list); /* adds a bunch of directories to the list of watch/crawl/service roots */ +gboolean tracker_file_is_in_root_dir (const char *uri); /* test if a given file resides in the watch/crawl/service roots */ GSList * tracker_get_all_files (const char *dir, gboolean dir_only); GSList * tracker_get_files (const char *dir, gboolean dir_only); diff --git a/src/trackerd/trackerd.c b/src/trackerd/trackerd.c index de34879..7b2a393 100644 --- a/src/trackerd/trackerd.c +++ b/src/trackerd/trackerd.c @@ -1022,6 +1022,8 @@ process_files_thread (void) list = tracker_get_service_dirs ("Applications"); + tracker_add_root_directories (list); + process_directory_list (db_con, list, FALSE); g_slist_free (list); @@ -1046,7 +1048,6 @@ process_files_thread (void) tracker_add_service_path ("GaimConversations", gaim); list = g_slist_prepend (NULL, gaim); - } if (tracker_file_is_valid (purple)) { @@ -1058,6 +1059,8 @@ process_files_thread (void) if (list) { tracker_log ("starting chat log indexing..."); + tracker_add_root_directories (list); + process_directory_list (db_con, list, TRUE); g_slist_free (list); @@ -1078,6 +1081,7 @@ process_files_thread (void) if (tracker->index_evolution_emails) { GSList *list; list = tracker_get_service_dirs ("EvolutionEmails"); + tracker_add_root_directories (list); process_directory_list (db_con, list, TRUE); g_slist_free (list); } @@ -1085,6 +1089,7 @@ process_files_thread (void) if (tracker->index_kmail_emails) { GSList *list; list = tracker_get_service_dirs ("KMailEmails"); + tracker_add_root_directories (list); process_directory_list (db_con, list, TRUE); g_slist_free (list); } @@ -1125,6 +1130,8 @@ process_files_thread (void) break; } + tracker_add_root_directories (tracker->watch_directory_roots_list); + /* index watched dirs first */ g_slist_foreach (tracker->watch_directory_roots_list, (GFunc) watch_dir, db_con); @@ -1157,6 +1164,8 @@ process_files_thread (void) break; } + tracker_add_root_directories (tracker->crawl_directory_list); + add_dirs_to_list (tracker->crawl_directory_list, db_con); g_slist_foreach (tracker->dir_list, (GFunc) schedule_dir_check, db_con); @@ -1985,6 +1994,9 @@ set_defaults () tracker->services_dir = g_build_filename (TRACKER_DATADIR, "tracker", "services", NULL); + tracker->skip_mount_points = FALSE; + tracker->root_directory_devices = NULL; + /* battery and ac power checks */ const char *battery_filenames[4] = { "/proc/acpi/ac_adapter/AC/state",
_______________________________________________ tracker-list mailing list tracker-list@gnome.org http://mail.gnome.org/mailman/listinfo/tracker-list