On 8/6/07, jamie <[EMAIL PROTECTED]> wrote:
> On Mon, 2007-08-06 at 13:36 +0200, Marcus Fritzsch wrote:
> > On 8/6/07, jamie <[EMAIL PROTECTED]> wrote:
> > > On Mon, 2007-08-06 at 04:03 +0200, Marcus Fritzsch wrote:
> > > > This patch prevents tracker from descending into mountpoints which is
> > > > a directory other than a watch- or crawl-root. I often have some fs
> > > > (sshfs, nfs) mounted under my $HOME and found tracker going down all
> > > > these trees. Also, there was a bug report on launchpad about this[1].
> > > >
> > > > I am however not sure if this method works, also I haven't found much
> > > > references when searching for it by google or clusty. If there are
> > > > objections to this method of mount-point 'discovery' it would be
> > > > possible to read mtab and store it in struct Tracker for processing.
> > > >
> > > > However as tracker is not that simple I am not aware of any side
> > > > effects this patch may have on tracker, I tested it - with mount
> > > > points in watched dirs and crawled ones, with fuse and nfs mounts.
> > > >
> > > > Any comments?
> > > >
> > >
> > > yeah its a good idea
> > >
> > > but take a look at tracker-utils.c:
> > >
> > > tracker_file_is_no_watched
> > > tracker_file_is_crawled
> >
> > Thought of this too. The function signature needs a 'current' dir and
> > a 'testdir' to check wether testdir is a different device then current
> > - there is no problem with that? That would be faster - however I
> > could also implement it by checking against the crawl and watch
> > directory roots list. Which one do you prefer?
>
> I would prefer the latter but can we store the device signature of the
> watch and crawl dirs somewhere so we are not constantly stating them?
>
> (Could use a GSList to store them and simply iterate over them)

Here is an overview on how skipping mount points work:

0a. Fields added: Tracker.root_directory_devices and Tracker.skip_mount_points

0b. Functions added: tracker_add_root_dir (),
tracker_add_root_directories () and tracker_file_is_in_root_dir ().
All in declared tracker-utils.h and implemented in tracker-utils.c.

1. Each service directory is added to the GSList
Tracker.root_directory_devices - there will be no doubles - that is
checked with a loop, if it gets too slow I could switch it e.g. to a
tree.

2. The new functions check for Tracker.skip_mount_points - so no one
has to do the check but those added functions.

3. get_files ignores files returned as not being on the same device as
the currently read directory (this, only if skip_mount_points is
enabled, see above).

4. A config option SkipMountPoints has been added to the Watches
section of tracker.cfg.

5. The default is to descend into mounted directories, as it was before.

I tested this patch quite a lot, including a --reindex run.

Cheers, Marcus
diff --git a/src/trackerd/tracker-utils.c b/src/trackerd/tracker-utils.c
index e006676..02d0b4a 100644
--- a/src/trackerd/tracker-utils.c
+++ b/src/trackerd/tracker-utils.c
@@ -1148,6 +1148,94 @@ tracker_file_is_crawled (const char* uri)
 }
 
 
+void
+tracker_add_root_dir (const char *uri)
+{
+	struct stat st;
+
+	if (! tracker->skip_mount_points) {
+		return;
+	}
+
+	if (!uri || uri[0] != '/') {
+		return;
+	}
+
+	if (g_stat (uri, &st) == 0) {
+		GSList * cur = NULL;
+		dev_t * elem = NULL;
+
+		if (! S_ISDIR (st.st_mode)) {
+			return;
+		}
+
+		/* FIXME: too costly? */
+		for (cur = tracker->root_directory_devices; cur; cur = g_slist_next (cur)) {
+			if (cur->data && *((dev_t *) cur->data) == st.st_dev) {
+				return;
+			}
+		}
+
+		elem = g_new (dev_t, 1);
+		* elem = st.st_dev;
+		tracker->root_directory_devices = g_slist_prepend (tracker->root_directory_devices, elem);
+	} else {
+		tracker_log ("Could not stat `%s'", uri);
+	}
+}
+
+
+void
+tracker_add_root_directories (GSList * uri_list)
+{
+	GSList * cur = NULL;
+
+	if (! tracker->skip_mount_points) {
+		return;
+	}
+
+	for (cur = uri_list; cur; cur = g_slist_next (cur)) {
+		tracker_add_root_dir ((const char *) cur->data);
+	}
+}
+
+
+gboolean
+tracker_file_is_in_root_dir (const char *uri)
+{
+	struct stat  st;
+	GSList *     cur = NULL;
+	dev_t        uri_dev = 0;
+
+	if (! tracker->skip_mount_points) {
+		return TRUE;
+	}
+
+	if (!uri || uri[0] != '/') {
+		return FALSE;
+	}
+
+	if (g_stat (uri, &st) == 0) {
+		uri_dev = st.st_dev;
+	} else {
+		tracker_log ("Could not stat `%s'", uri);
+		return TRUE; /* the caller should take care of skipping this one */
+	}
+
+	if (! S_ISDIR (st.st_mode)) { /* only directories are mount points and therefore checked */
+		return TRUE;
+	}
+
+	for (cur = tracker->root_directory_devices; cur; cur = g_slist_next (cur)) {
+		if (cur->data && *((dev_t *) cur->data) == uri_dev) {
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+
 gboolean
 tracker_file_info_is_valid (FileInfo *info)
 {
@@ -1861,6 +1949,13 @@ get_files (const char *dir, gboolean dir_only, gboolean skip_ignored_files)
 			g_free (str);
 
  			if (!tracker_file_is_valid (mystr)) {
+				g_free (mystr);
+				continue;
+			}
+
+			if (!tracker_file_is_in_root_dir (mystr)) {
+				tracker_log ("Skipping mount point %s", mystr);
+				g_free (mystr);
 				continue;
 			}
 
@@ -2305,7 +2400,9 @@ tracker_load_config_file (void)
 					 "# List of directory roots to not index and not watch seperated by semicolons\n",
 					 "NoWatchDirectory=\n",
 					 "# Set to false to prevent watching of any kind\n",
-					 "EnableWatching=true\n\n",
+					 "EnableWatching=true\n",
+					 "# Set to true prevents tracker from descending into mounted directory trees\n",
+					 "SkipMountPoints=false\n\n",
 					 "[Indexing]\n",
 					 "# Throttles the indexing process. Allowable values are 0-20. higher values decrease indexing speed\n",
 					 "Throttle=0\n",
@@ -2416,6 +2513,10 @@ tracker_load_config_file (void)
 		tracker->enable_watching = g_key_file_get_boolean (key_file, "Watches", "EnableWatching", NULL);
 	}
 
+	if (g_key_file_has_key (key_file, "Watches", "SkipMountPoints", NULL)) {
+		tracker->skip_mount_points = g_key_file_get_boolean (key_file, "Watches", "SkipMountPoints", NULL);
+	}
+
 
 	/* Indexing options */
 
diff --git a/src/trackerd/tracker-utils.h b/src/trackerd/tracker-utils.h
index 76b8c49..9ad9e6b 100644
--- a/src/trackerd/tracker-utils.h
+++ b/src/trackerd/tracker-utils.h
@@ -248,6 +248,9 @@ typedef struct {
 	gboolean	first_flush;
 	gboolean	do_optimize;
 
+	gboolean	skip_mount_points;	/* should tracker descend into mounted directories? see Tracker.root_directory_devices */
+	GSList *	root_directory_devices;
+
 	IndexStatus	index_status;
 
 	/* battery and ac power status file */
@@ -517,8 +520,12 @@ gboolean	tracker_file_is_indexable 	(const char *uri);
 
 gboolean 	tracker_is_directory 		(const char *dir);
 
-gboolean	tracker_file_is_no_watched 	(const char* uri);
-gboolean	tracker_file_is_crawled 	(const char* uri);
+gboolean	tracker_file_is_no_watched 	(const char *uri);
+gboolean	tracker_file_is_crawled 	(const char *uri);
+
+void		tracker_add_root_dir		(const char *uri);  /* add a directory to the list of watch/crawl/service roots */
+void		tracker_add_root_directories	(GSList *uri_list); /* adds a bunch of directories to the list of watch/crawl/service roots */
+gboolean	tracker_file_is_in_root_dir	(const char *uri);  /* test if a given file resides in the watch/crawl/service roots */
 
 GSList * 	tracker_get_all_files 		(const char *dir, gboolean dir_only);
 GSList * 	tracker_get_files 		(const char *dir, gboolean dir_only);
diff --git a/src/trackerd/trackerd.c b/src/trackerd/trackerd.c
index de34879..7b2a393 100644
--- a/src/trackerd/trackerd.c
+++ b/src/trackerd/trackerd.c
@@ -1022,6 +1022,8 @@ process_files_thread (void)
 
 								list = tracker_get_service_dirs ("Applications");
 
+								tracker_add_root_directories (list);
+
 								process_directory_list (db_con, list, FALSE);
 
 								g_slist_free (list);
@@ -1046,7 +1048,6 @@ process_files_thread (void)
 									tracker_add_service_path ("GaimConversations", gaim);
 
 									list = g_slist_prepend (NULL, gaim);
-
 								}
 
 								if (tracker_file_is_valid (purple)) {
@@ -1058,6 +1059,8 @@ process_files_thread (void)
 								if (list) {
 									tracker_log ("starting chat log indexing...");
 
+									tracker_add_root_directories (list);
+
 									process_directory_list (db_con, list, TRUE);
 
 									g_slist_free (list);
@@ -1078,6 +1081,7 @@ process_files_thread (void)
 									if (tracker->index_evolution_emails) {
 										GSList *list;
 										list = tracker_get_service_dirs ("EvolutionEmails");
+										tracker_add_root_directories (list);
 										process_directory_list (db_con, list, TRUE);
 										g_slist_free (list);
 									}
@@ -1085,6 +1089,7 @@ process_files_thread (void)
 									if (tracker->index_kmail_emails) {
 										GSList *list;
 										list = tracker_get_service_dirs ("KMailEmails");
+										tracker_add_root_directories (list);
 										process_directory_list (db_con, list, TRUE);
 										g_slist_free (list);
 									}
@@ -1125,6 +1130,8 @@ process_files_thread (void)
 									break;
 								}
 
+								tracker_add_root_directories (tracker->watch_directory_roots_list);
+
 								/* index watched dirs first */
 								g_slist_foreach (tracker->watch_directory_roots_list, (GFunc) watch_dir, db_con);
 
@@ -1157,6 +1164,8 @@ process_files_thread (void)
 									break;
 								}
 
+								tracker_add_root_directories (tracker->crawl_directory_list);
+
 								add_dirs_to_list (tracker->crawl_directory_list, db_con);
 
 								g_slist_foreach (tracker->dir_list, (GFunc) schedule_dir_check, db_con);
@@ -1985,6 +1994,9 @@ set_defaults ()
 
 	tracker->services_dir = g_build_filename (TRACKER_DATADIR, "tracker", "services", NULL);
 
+	tracker->skip_mount_points = FALSE;
+	tracker->root_directory_devices = NULL;
+
 	/* battery and ac power checks */
 	const char *battery_filenames[4] = {
 		"/proc/acpi/ac_adapter/AC/state",
_______________________________________________
tracker-list mailing list
tracker-list@gnome.org
http://mail.gnome.org/mailman/listinfo/tracker-list

Reply via email to