Revised patch that handles recursive symlinks by processing each directory only once. Directories are identified by inode and device.
diff --git a/src/jarabe/journal/model.py b/src/jarabe/journal/model.py index 50e8dc1..0bb571c 100644 --- a/src/jarabe/journal/model.py +++ b/src/jarabe/journal/model.py @@ -1,4 +1,4 @@ -# Copyright (C) 2007-2008, One Laptop Per Child +# Copyright (C) 2007-2010, One Laptop per Child # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,10 +16,11 @@ import logging import os +import errno from datetime import datetime import time import shutil -from stat import S_IFMT, S_IFDIR, S_IFREG +from stat import S_IFLNK, S_IFMT, S_IFDIR, S_IFREG import traceback import re @@ -258,7 +259,9 @@ class InplaceResultSet(BaseResultSet): BaseResultSet.__init__(self, query, cache_limit) self._mount_point = mount_point self._file_list = None - self._pending_directories = 0 + self._pending_directories = [] + self._visited_directories = [] + self._pending_files = [] self._stopped = False query_text = query.get('query', '') @@ -283,7 +286,10 @@ class InplaceResultSet(BaseResultSet): def setup(self): self._file_list = [] - self._recurse_dir(self._mount_point) + self._pending_directories = [self._mount_point] + self._visited_directories = [] + self._pending_files = [] + gobject.idle_add(self._scan) def stop(self): self._stopped = True @@ -317,51 +323,99 @@ class InplaceResultSet(BaseResultSet): return entries, total_count - def _recurse_dir(self, dir_path): + def _scan(self): if self._stopped: - return + return False - for entry in os.listdir(dir_path): + self.progress.send(self) + + if len(self._pending_files) > 0: + return self._scan_a_file() + + if len(self._pending_directories) > 0: + return self._scan_a_directory() + + self.setup_ready() + self._visited_directories = [] + return False + + def _scan_a_file(self): + full_path = self._pending_files.pop(0) + + try: + stat = os.lstat(full_path) + except OSError, e: + if e.errno != errno.ENOENT: + logging.exception( + 'Error reading metadata of file %r', full_path) + return True + + if S_IFMT(stat.st_mode) == S_IFLNK: + try: + link = os.readlink(full_path) + except OSError, e: + logging.exception( + 'Error reading target of link %r', full_path) + return True + + if link == '.': + return True + if link.startswith('/') and full_path.startswith(link): + return True + + try: + stat = os.stat(full_path) + + except OSError, e: + if e.errno != errno.ENOENT: + logging.exception( + 'Error reading metadata of linked file %r', full_path) + return True + + if S_IFMT(stat.st_mode) == S_IFDIR: + id_tuple = stat.st_ino, stat.st_dev + if not id_tuple in self._visited_directories: + self._visited_directories.append(id_tuple) + self._pending_directories.append(full_path) + return True + + if S_IFMT(stat.st_mode) != S_IFREG: + return True + + if self._regex is not None and \ + not self._regex.match(full_path): + return True + + if None not in [self._date_start, self._date_end] and \ + (stat.st_mtime < self._date_start or + stat.st_mtime > self._date_end): + return True + + if self._mime_types: + mime_type = gio.content_type_guess(filename=full_path) + if mime_type not in self._mime_types: + return True + + file_info = (full_path, stat, int(stat.st_mtime)) + self._file_list.append(file_info) + + return True + + def _scan_a_directory(self): + dir_path = self._pending_directories.pop(0) + + try: + entries = os.listdir(dir_path) + except OSError, e: + if e.errno not in [errno.EACCES, errno.ENOTDIR]: + logging.exception('Error reading directory %r', dir_path) + return True + + for entry in entries: if entry.startswith('.'): continue - full_path = dir_path + '/' + entry - try: - stat = os.stat(full_path) - if S_IFMT(stat.st_mode) == S_IFDIR: - self._pending_directories += 1 - gobject.idle_add(lambda s=full_path: self._recurse_dir(s)) - - elif S_IFMT(stat.st_mode) == S_IFREG: - add_to_list = True - - if self._regex is not None and \ - not self._regex.match(full_path): - add_to_list = False - - if None not in [self._date_start, self._date_end] and \ - (stat.st_mtime < self._date_start or - stat.st_mtime > self._date_end): - add_to_list = False - - if self._mime_types: - mime_type = gio.content_type_guess(filename=full_path) - if mime_type not in self._mime_types: - add_to_list = False - - if add_to_list: - file_info = (full_path, stat, int(stat.st_mtime)) - self._file_list.append(file_info) - - self.progress.send(self) - - except Exception: - logging.error('Error reading file %r: %s' % \ - (full_path, traceback.format_exc())) - - if self._pending_directories == 0: - self.setup_ready() - else: - self._pending_directories -= 1 + self._pending_files.append(dir_path + '/' + entry) + return True def _get_file_metadata(path, stat): client = gconf.client_get_default() (made with git diff --patience) -- James Cameron http://quozl.linux.org.au/ _______________________________________________ Sugar-devel mailing list Sugar-devel@lists.sugarlabs.org http://lists.sugarlabs.org/listinfo/sugar-devel