ArielGlenn has submitted this change and it was merged.

Change subject: clean up ignore list code
......................................................................


clean up ignore list code

move duplicate ignoreslist methods into module
move ignore list methods/classes into module

Change-Id: Ie9b7d995124c5875bf86be590836d963a4e93105
---
M dataretention/data_auditor.py
M dataretention/retention/cli.py
M dataretention/retention/fileutils.py
M dataretention/retention/ignores.py
M dataretention/retention/localfileaudit.py
M dataretention/retention/localhomeaudit.py
M dataretention/retention/remotefileauditor.py
M dataretention/retention/retentionaudit.py
D dataretention/retention/userconfretriever.py
9 files changed, 380 insertions(+), 402 deletions(-)

Approvals:
  ArielGlenn: Verified; Looks good to me, approved



diff --git a/dataretention/data_auditor.py b/dataretention/data_auditor.py
index 9d7c343..37800af 100644
--- a/dataretention/data_auditor.py
+++ b/dataretention/data_auditor.py
@@ -8,7 +8,7 @@
 from retention.remotelogauditor import RemoteLogsAuditor
 from retention.remotehomeauditor import RemoteHomesAuditor
 from retention.examiner import RemoteFileExaminer, RemoteDirExaminer
-from retention.userconfretriever import RemoteUserCfRetriever
+from retention.ignores import RemoteUserCfRetriever
 
 def usage(message=None):
     if message:
diff --git a/dataretention/retention/cli.py b/dataretention/retention/cli.py
index 4541a25..d199e29 100644
--- a/dataretention/retention/cli.py
+++ b/dataretention/retention/cli.py
@@ -10,7 +10,6 @@
 from retention.status import Status
 from retention.rule import RuleStore
 import retention.remotefileauditor
-from retention.localhomeaudit import LocalHomesAuditor
 from retention.locallogaudit import LocalLogsAuditor
 from retention.fileinfo import FileInfo
 import retention.utils
@@ -19,9 +18,9 @@
 from retention.examiner import RemoteDirExaminer, RemoteFileExaminer
 import retention.fileutils
 import retention.ruleutils
-from retention.userconfretriever import RemoteUserCfRetriever
 import retention.cliutils
-from retention.ignores import Ignores
+from retention.ignores import Ignores, RemoteUserCfRetriever
+import retention.ignores
 from retention.completion import Completion
 
 
@@ -279,7 +278,7 @@
             else:
                 local_ign = RemoteUserCfRetriever(host_todo, self.timeout, 
self.audit_type)
                 self.local_ignores = local_ign.run(True)
-                local_ignored_dirs, local_ignored_files = 
LocalHomesAuditor.process_local_ignores(
+                local_ignored_dirs, local_ignored_files = 
retention.ignores.process_local_ignores(
                     self.local_ignores, self.ignored)
                 self.do_one_host(host_todo, report)
 
@@ -365,19 +364,19 @@
             path = LocalLogsAuditor.normalize(path)
 
         if entrytype == 'file':
-            if retention.fileutils.file_is_ignored(path, basedir, 
self.ignored):
+            if retention.ignores.file_is_ignored(path, basedir, self.ignored):
                 return False
 
             # check perhost file
             if self.cenv.host in self.ignores.perhost_ignores:
-                if retention.fileutils.file_is_ignored(
+                if retention.ignores.file_is_ignored(
                         path, basedir,
                         self.ignores.perhost_ignores[self.cenv.host]):
                     return False
 
             # check perhost rules
             if self.cenv.host in self.ignores.perhost_ignores_from_rules:
-                if retention.fileutils.file_is_ignored(
+                if retention.ignores.file_is_ignored(
                         path, basedir,
                         
self.ignores.perhost_ignores_from_rules[self.cenv.host]):
                     return False
diff --git a/dataretention/retention/fileutils.py 
b/dataretention/retention/fileutils.py
index 9c0b537..17531b5 100644
--- a/dataretention/retention/fileutils.py
+++ b/dataretention/retention/fileutils.py
@@ -73,44 +73,6 @@
             return True
     return False
 
-def expand_ignored_dirs(basedir, ignored):
-    '''
-    find dirs to ignore relative to the specified
-    basedir, in Config entry.  Fall back to wildcard spec
-    if there is not entry for the basedir.  Dirs in
-    Config entry may have one * in the path, this
-    will be treated as a wildcard for the purposes
-    of checking directories against the entry.
-
-    args: absolute path of basedir being crawled
-          hash of ignored dirs, file, etc
-    returns: list of absolute paths of dirs to ignore,
-    plus separate list of abslute paths containing '*',
-    also to ignore, or the empty list if there are none
-    '''
-
-    dirs = []
-    wildcard_dirs = []
-
-    to_expand = []
-    if 'dirs' in ignored:
-        if '*' in ignored['dirs']:
-            to_expand.extend(ignored['dirs']['*'])
-
-        if '/' in ignored['dirs']:
-            to_expand.extend(ignored['dirs']['/'])
-
-        if basedir in ignored['dirs']:
-            to_expand.extend(ignored['dirs'][basedir])
-
-        for dname in to_expand:
-            if '*' in dname:
-                wildcard_dirs.append(os.path.join(basedir, dname))
-            else:
-                dirs.append(os.path.join(basedir, dname))
-
-    return dirs, wildcard_dirs
-
 def wildcard_matches(dirname, wildcard_dirs, exact=True):
     '''given a list of absolute paths with exactly one '*'
     in each entry, see if the passed dirname matches
@@ -128,57 +90,6 @@
                 return True
             else:
                 continue
-    return False
-
-def file_is_ignored(fname, basedir, ignored):
-    '''
-    pass normalized name (abs path), basedir (location audited),
-    hash of ignored files, dirs, prefixes, extensions
-    get back True if the file is to be ignored and
-    False otherwise
-    '''
-
-    basename = os.path.basename(fname)
-
-    if 'prefixes' in ignored:
-        if startswith(basename, ignored['prefixes']):
-            return True
-
-    if 'extensions' in ignored:
-        if '*' in ignored['extensions']:
-            if endswith(basename, ignored['extensions']['*']):
-                return True
-        if basedir in ignored['extensions']:
-            if endswith(
-                    basename, ignored['extensions'][basedir]):
-                return True
-
-    if 'files' in ignored:
-        if basename in ignored['files']:
-            return True
-        if '*' in ignored['files']:
-            if endswith(basename, ignored['files']['*']):
-                return True
-
-        if '/' in ignored['files']:
-            if fname in ignored['files']['/']:
-                return True
-            if wildcard_matches(
-                    fname, [w for w in ignored['files']['/'] if '*' in w]):
-                return True
-
-        if basedir in ignored['files']:
-            if endswith(basename, ignored['files'][basedir]):
-                return True
-    return False
-
-def dir_is_ignored(dirname, ignored):
-    expanded_dirs, wildcard_dirs = expand_ignored_dirs(
-        os.path.dirname(dirname), ignored)
-    if dirname in expanded_dirs:
-        return True
-    if wildcard_matches(dirname, wildcard_dirs):
-        return True
     return False
 
 def dir_is_wrong_type(dirname):
diff --git a/dataretention/retention/ignores.py 
b/dataretention/retention/ignores.py
index f324eac..482508b 100644
--- a/dataretention/retention/ignores.py
+++ b/dataretention/retention/ignores.py
@@ -1,15 +1,183 @@
 import os
 import sys
 import runpy
+import json
+import salt.client
 
 sys.path.append('/srv/audits/retention/scripts/')
 
 from retention.status import Status
 import retention.remotefileauditor
 import retention.utils
+from retention.utils import JsonHelper
 import retention.fileutils
 import retention.ruleutils
 import retention.cliutils
+from retention.config import Config
+
+def expand_ignored_dirs(basedir, ignored):
+    '''
+    find dirs to ignore relative to the specified
+    basedir, in Config entry.  Fall back to wildcard spec
+    if there is not entry for the basedir.  Dirs in
+    Config entry may have one * in the path, this
+    will be treated as a wildcard for the purposes
+    of checking directories against the entry.
+
+    args: absolute path of basedir being crawled
+          hash of ignored dirs, file, etc
+    returns: list of absolute paths of dirs to ignore,
+    plus separate list of abslute paths containing '*',
+    also to ignore, or the empty list if there are none
+    '''
+
+    dirs = []
+    wildcard_dirs = []
+
+    to_expand = []
+    if 'dirs' in ignored:
+        if '*' in ignored['dirs']:
+            to_expand.extend(ignored['dirs']['*'])
+
+        if '/' in ignored['dirs']:
+            to_expand.extend(ignored['dirs']['/'])
+
+        if basedir in ignored['dirs']:
+            to_expand.extend(ignored['dirs'][basedir])
+
+        for dname in to_expand:
+            if '*' in dname:
+                wildcard_dirs.append(os.path.join(basedir, dname))
+            else:
+                dirs.append(os.path.join(basedir, dname))
+    return dirs, wildcard_dirs
+
+def dir_is_ignored(dirname, ignored):
+    expanded_dirs, wildcard_dirs = expand_ignored_dirs(
+        os.path.dirname(dirname), ignored)
+    if dirname in expanded_dirs:
+        return True
+    if wildcard_matches(dirname, wildcard_dirs):
+        return True
+    return False
+
+def file_is_ignored(fname, basedir, ignored):
+    '''
+    pass normalized name (abs path), basedir (location audited),
+    hash of ignored files, dirs, prefixes, extensions
+    get back True if the file is to be ignored and
+    False otherwise
+    '''
+
+    basename = os.path.basename(fname)
+
+    if 'prefixes' in ignored:
+        if startswith(basename, ignored['prefixes']):
+            return True
+
+    if 'extensions' in ignored:
+        if '*' in ignored['extensions']:
+            if endswith(basename, ignored['extensions']['*']):
+                return True
+        if basedir in ignored['extensions']:
+            if endswith(
+                    basename, ignored['extensions'][basedir]):
+                return True
+
+    if 'files' in ignored:
+        if basename in ignored['files']:
+            return True
+        if '*' in ignored['files']:
+            if endswith(basename, ignored['files']['*']):
+                return True
+
+        if '/' in ignored['files']:
+            if fname in ignored['files']['/']:
+                return True
+            if wildcard_matches(
+                    fname, [w for w in ignored['files']['/'] if '*' in w]):
+                return True
+
+        if basedir in ignored['files']:
+            if endswith(basename, ignored['files'][basedir]):
+                return True
+    return False
+
+def get_home_dirs(locations):
+    '''
+    get a list of home directories where the root location(s) for home are
+    specified in the Config class (see 'home_locations'), by reading
+    these root location dirs and grabbing all subdirectory names from them
+    '''
+    home_dirs = []
+
+    for location in Config.cf[locations]:
+        if not os.path.isdir(location):
+            continue
+        home_dirs.extend([os.path.join(location, d)
+                          for d in os.listdir(location)
+                          if os.path.isdir(os.path.join(location, d))])
+    return home_dirs
+
+def get_local_ignores(locations):
+    '''
+    read a list of absolute paths from /home/blah/.data_retention
+    for all blah.  Dirs are specified by op sep at the end ('/')
+    and files without.
+    '''
+    local_ignores = {}
+    home_dirs = get_home_dirs(locations)
+    for hdir in home_dirs:
+        local_ignores[hdir] = []
+        if os.path.exists(os.path.join(hdir, ".data_retention")):
+            try:
+                filep = open(os.path.join(hdir, ".data_retention"))
+                entries = filep.read().split("\n")
+                filep.close()
+            except:
+                pass
+            entries = filter(None, [e.strip() for e in entries])
+            # fixme should sanity check these? ???
+            # what happens if people put wildcards in the wrong
+            # component, or put utter garbage in there, or...?
+            local_ignores[hdir].extend(entries)
+        return local_ignores
+
+def process_local_ignores(local_ignores, ignored):
+    '''
+    files or dirs listed in data retention conf in homedir
+    are considered 'good' and added to ignore list
+
+    non-absolute paths will be taken as relative to the
+    home dir of the data retention config they were
+    read from
+    '''
+
+    local_ignored_dirs = []
+    local_ignored_files = []
+    for basedir in local_ignores:
+        for path in local_ignores[basedir]:
+            if not path.startswith('/'):
+                path = os.path.join(basedir, path)
+
+            if path.endswith('/'):
+                if 'dirs' not in ignored:
+                    ignored['dirs'] = {}
+                if '/' not in ignored['dirs']:
+                    ignored['dirs']['/'] = []
+
+                ignored['dirs']['/'].append(path[:-1])
+                local_ignored_dirs.append(path[:-1])
+            else:
+                if 'files' not in ignored:
+                    ignored['files'] = {}
+                if '/' not in ignored['files']:
+                    ignored['files']['/'] = []
+
+                ignored['files']['/'].append(path)
+                local_ignored_files.append(path)
+    return local_ignored_dirs, local_ignored_files
+
 
 class Ignores(object):
     '''
@@ -20,11 +188,39 @@
     def __init__(self, cdb):
         self.cdb = cdb
         self.perhost_rules_from_file = None
-        self.hosts = self.cdb.store_db_list_all_hosts()
+        if cdb is not None:
+            self.hosts = self.cdb.store_db_list_all_hosts()
+        else:
+            self.hosts = None
+
         self.perhost_ignores = {}
         self.perhost_ignores_from_rules = {}
         self.perhost_rules_from_store = {}
         self.get_perhost_cf_from_file()
+        self.ignored = {}
+
+    def set_up_ignored(self, ignore_also):
+        '''
+        collect up initial list of files/dirs to skip during audit
+        '''
+
+        self.ignored['files'] = Config.cf['ignored_files']
+        self.ignored['dirs'] = Config.cf['ignored_dirs']
+        self.ignored['prefixes'] = Config.cf['ignored_prefixes']
+        self.ignored['extensions'] = Config.cf['ignored_extensions']
+
+        if ignore_also is not None:
+            # silently skip paths that are not absolute
+            for path in ignore_also:
+                if path.startswith('/'):
+                    if path.endswith('/'):
+                        if '/' not in self.ignored['dirs']:
+                            self.ignored['dirs']['/'] = []
+                        self.ignored['dirs']['/'].append(path[:-1])
+                    else:
+                        if '/' not in self.ignored['files']:
+                            self.ignored['files']['/'] = []
+                        self.ignored['files']['/'].append(path)
 
     def get_perhost_from_rules(self, hosts=None):
         if hosts == None:
@@ -69,21 +265,154 @@
             except:
                 self.perhost_rules_from_file = None
 
-        if self.perhost_rules_from_file is not None:
-            if 'ignored_dirs' in self.perhost_rules_from_file:
-                for host in self.perhost_rules_from_file['ignored_dirs']:
-                    if host not in self.perhost_ignores:
-                        self.perhost_ignores[host] = {}
-                    self.perhost_ignores[host]['dirs'] = {}
-                    self.perhost_ignores[host]['dirs']['/'] = [
-                        (lambda path: path[:-1] if path[-1] == '/'
-                         else path)(p)
-                        for p in self.perhost_rules_from_file[
+        if self.perhost_rules_from_file is None:
+            return
+
+        if 'ignored_dirs' in self.perhost_rules_from_file:
+            for host in self.perhost_rules_from_file['ignored_dirs']:
+                if host not in self.perhost_ignores:
+                    self.perhost_ignores[host] = {}
+                self.perhost_ignores[host]['dirs'] = {}
+                self.perhost_ignores[host]['dirs']['/'] = [
+                    (lambda path: path[:-1] if path[-1] == '/'
+                     else path)(p)
+                    for p in self.perhost_rules_from_file[
                             'ignored_dirs'][host]]
-            if 'ignored_files' in self.perhost_rules_from_file:
-                for host in self.perhost_rules_from_file['ignored_files']:
-                    if host not in self.perhost_ignores:
-                        self.perhost_ignores[host] = {}
-                    self.perhost_ignores[host]['files'] = {}
-                    self.perhost_ignores[host]['files']['/'] = (
-                        self.perhost_rules_from_file['ignored_files'][host])
+        if 'ignored_files' in self.perhost_rules_from_file:
+            for host in self.perhost_rules_from_file['ignored_files']:
+                if host not in self.perhost_ignores:
+                    self.perhost_ignores[host] = {}
+                self.perhost_ignores[host]['files'] = {}
+                self.perhost_ignores[host]['files']['/'] = (
+                    self.perhost_rules_from_file['ignored_files'][host])
+
+    def add_perhost_rules_to_ignored(self, host):
+        '''
+        add dirs/files to be skipped during audit based
+        on rules in the rule store db
+        '''
+        if '/' not in self.ignored['dirs']:
+            self.ignored['dirs']['/'] = []
+        if '/' not in self.ignored['files']:
+            self.ignored['files']['/'] = []
+        if host not in self.perhost_rules_from_store:
+            return
+
+        for rule in self.perhost_rules_from_store[host]:
+            path = os.path.join(rule['basedir'], rule['name'])
+            if rule['status'] == 'good':
+                if retention.ruleutils.entrytype_to_text(rule['type']) == 
'dir':
+                    if path not in self.ignored['dirs']['/']:
+                        self.ignored['dirs']['/'].append(path)
+                elif retention.ruleutils.entrytype_to_text(rule['type']) == 
'file':
+                    if path not in self.ignored['files']['/']:
+                        self.ignored['files']['/'].append(path)
+                else:
+                    # some other random type, don't care
+                    continue
+
+    def show_ignored(self, basedirs):
+        sys.stderr.write(
+            "INFO: The below does not include per-host rules\n")
+        sys.stderr.write(
+            "INFO: or rules derived from the directory status entries.\n")
+
+        sys.stderr.write("INFO: Ignoring the following directories:\n")
+
+        for basedir in self.ignored['dirs']:
+            if basedir in basedirs or basedir == '*' or basedir == '/':
+                sys.stderr.write(
+                    "INFO: " + ','.join(self.ignored['dirs'][basedir])
+                    + " in " + basedir + '\n')
+
+        sys.stderr.write("INFO: Ignoring the following files:\n")
+        for basedir in self.ignored['files']:
+            if basedir in basedirs or basedir == '*' or basedir == '/':
+                sys.stderr.write(
+                    "INFO: " + ','.join(self.ignored['files'][basedir])
+                    + " in " + basedir + '\n')
+
+        sys.stderr.write(
+            "INFO: Ignoring files starting with the following:\n")
+        sys.stderr.write(
+            "INFO: " + ','.join(self.ignored['prefixes']) + '\n')
+
+        sys.stderr.write(
+            "INFO: Ignoring files ending with the following:\n")
+        for basedir in self.ignored['extensions']:
+            if basedir in basedirs or basedir == '*':
+                sys.stderr.write("INFO: " + ','.join(
+                    self.ignored['extensions'][basedir])
+                                 + " in " + basedir + '\n')
+
+
+class RemoteUserCfRetriever(object):
+    '''
+    retrieval and display dirs / files listed as to
+    be ignored in per-user lists on remote host
+    '''
+    def __init__(self, host, timeout, audit_type):
+        self.host = host
+        self.timeout = timeout
+        self.audit_type = audit_type
+        self.locations = audit_type + "_locations"
+
+    def run(self, quiet=False):
+        '''
+        do all the work
+
+        note that 'quiet' applies only to remotely
+        run, and the same is true for returning the contents.
+        maybe we want to fix that
+        '''
+
+        local_ignores = {}
+
+        client = salt.client.LocalClient()
+        module_args = [self.timeout, self.audit_type]
+
+        result = client.cmd([self.host], "retentionaudit.retrieve_usercfs",
+                            module_args, expr_form='list',
+                            timeout=self.timeout)
+
+        if self.host in result:
+            input = result[self.host]
+            try:
+                local_ignores = json.loads(
+                    input, object_hook=JsonHelper.decode_dict)
+            except:
+                print "WARNING: failed to get local ignores on host",
+                print self.host,
+                print "got this:", input
+                local_ignores = {}
+
+        if not quiet:
+            print local_ignores
+
+        return local_ignores
+
+class LocalUserCfRetriever(object):
+    '''
+    retrieval and display dirs / files listed as to
+    be ignored in per-user lists on local host
+    '''
+    def __init__(self, timeout, audit_type='homes'):
+        self.timeout = timeout
+        self.audit_type = audit_type
+        self.locations = audit_type + "_locations"
+
+    def run(self, quiet=False):
+        '''
+        do all the work
+
+        note that 'quiet' applies only to remotely
+        run, and the same is true for returning the contents.
+        maybe we want to fix that
+        '''
+
+        local_ignores = {}
+
+        local_ignores = get_local_ignores(self.locations)
+        output = json.dumps(local_ignores)
+        print output
+        return output
diff --git a/dataretention/retention/localfileaudit.py 
b/dataretention/retention/localfileaudit.py
index 24b54c2..77538d1 100644
--- a/dataretention/retention/localfileaudit.py
+++ b/dataretention/retention/localfileaudit.py
@@ -15,7 +15,8 @@
 from retention.fileinfo import FileInfo
 import retention.fileutils
 import retention.ruleutils
-
+from retention.ignores import Ignores
+import retention.ignores
 
 class LocalFilesAuditor(object):
     '''
@@ -65,7 +66,8 @@
         self.timeout = timeout
 
         self.ignored = {}
-        self.set_up_ignored()
+        self.ignores = Ignores(None)
+        self.ignores.set_up_ignored()
 
         self.hostname = socket.getfqdn()
 
@@ -120,69 +122,22 @@
             '/srv/audits/retention/configs/allhosts_file.cf')['perhostcf']
 
         if self.perhost_rules_from_store is not None:
-            self.add_perhost_rules_to_ignored()
+            self.ignores.add_perhost_rules_to_ignored(self.hostname)
 
         if (self.perhost_rules_from_file is not None and
                 'ignored_dirs' in self.perhost_rules_from_file):
-            if '/' not in self.ignored['dirs']:
-                self.ignored['dirs']['/'] = []
+            if '/' not in self.ignores.ignored['dirs']:
+                self.ignores.ignored['dirs']['/'] = []
             if self.hostname in self.perhost_rules_from_file['ignored_dirs']:
                 for path in self.perhost_rules_from_file[
                         'ignored_dirs'][self.hostname]:
                     if path.startswith('/'):
-                        self.ignored['dirs']['/'].append(path)
+                        self.ignores.ignored['dirs']['/'].append(path)
             if '*' in self.perhost_rules_from_file['ignored_dirs']:
                 for path in self.perhost_rules_from_file[
                         'ignored_dirs'][self.hostname]:
                     if path.startswith('/'):
-                        self.ignored['dirs']['/'].append(path)
-
-    def set_up_ignored(self):
-        '''
-        collect up initial list of files/dirs to skip during audit
-        '''
-        self.ignored['files'] = Config.cf['ignored_files']
-        self.ignored['dirs'] = Config.cf['ignored_dirs']
-        self.ignored['prefixes'] = Config.cf['ignored_prefixes']
-        self.ignored['extensions'] = Config.cf['ignored_extensions']
-
-        if self.ignore_also is not None:
-            # silently skip paths that are not absolute
-            for path in self.ignore_also:
-                if path.startswith('/'):
-                    if path.endswith('/'):
-                        if '/' not in self.ignored['dirs']:
-                            self.ignored['dirs']['/'] = []
-                        self.ignored['dirs']['/'].append(path[:-1])
-                    else:
-                        if '/' not in self.ignored['files']:
-                            self.ignored['files']['/'] = []
-                        self.ignored['files']['/'].append(path)
-
-    def add_perhost_rules_to_ignored(self):
-        '''
-        add dirs/files to be skipped during audit based
-        on rules in the rule store db
-        '''
-        if '/' not in self.ignored['dirs']:
-            self.ignored['dirs']['/'] = []
-        if '/' not in self.ignored['files']:
-            self.ignored['files']['/'] = []
-        for host in self.perhost_rules_from_store:
-            if host == self.hostname:
-                for rule in self.perhost_rules_from_store[host]:
-                    path = os.path.join(rule['basedir'], rule['name'])
-                    if rule['status'] == 'good':
-                        if retention.ruleutils.entrytype_to_text(rule['type']) 
== 'dir':
-                            if path not in self.ignored['dirs']['/']:
-                                self.ignored['dirs']['/'].append(path)
-                        elif 
retention.ruleutils.entrytype_to_text(rule['type']) == 'file':
-                            if path not in self.ignored['files']['/']:
-                                self.ignored['files']['/'].append(path)
-                        else:
-                            # some other random type, don't care
-                            continue
-                break
+                        self.ignores.ignored['dirs']['/'].append(path)
 
     def normalize(self, fname):
         '''
@@ -202,7 +157,7 @@
         '''
         fname = self.normalize(fname)
 
-        if retention.fileutils.file_is_ignored(fname, basedir, self.ignored):
+        if retention.ignores.file_is_ignored(fname, basedir, 
self.ignores.ignored):
             return False
 
         if (self.filenames_to_check is not None and
@@ -214,7 +169,7 @@
     def get_subdirs_to_do(self, dirname, dirname_depth, todo):
 
         locale.setlocale(locale.LC_ALL, '')
-        if retention.fileutils.dir_is_ignored(dirname, self.ignored):
+        if retention.fileutils.dir_is_ignored(dirname, self.ignores.ignored):
             return todo
         if retention.fileutils.dir_is_wrong_type(dirname):
             return todo
@@ -328,7 +283,7 @@
             if not retention.fileutils.dirtree_check(subdirpath, 
self.dirs_to_check):
                 return
 
-        if retention.fileutils.dir_is_ignored(subdirpath, self.ignored):
+        if retention.fileutils.dir_is_ignored(subdirpath, 
self.ignores.ignored):
             return True
 
         count = 0
@@ -356,8 +311,8 @@
         # cutoff won't be in our list
         temp_results = []
         for base, paths, files in self.walk_nolinks(subdirpath):
-            expanded_dirs, wildcard_dirs = 
retention.fileutils.expand_ignored_dirs(
-                base, self.ignored)
+            expanded_dirs, wildcard_dirs = 
retention.ignores.expand_ignored_dirs(
+                base, self.ignores.ignored)
             if self.dirs_to_check is not None:
                 paths[:] = [p for p in paths
                             if 
retention.fileutils.dirtree_check(os.path.join(base, p),
diff --git a/dataretention/retention/localhomeaudit.py 
b/dataretention/retention/localhomeaudit.py
index 10bdcf2..2936990 100644
--- a/dataretention/retention/localhomeaudit.py
+++ b/dataretention/retention/localhomeaudit.py
@@ -7,6 +7,7 @@
 import retention.magic
 from retention.config import Config
 from retention.localfileaudit import LocalFilesAuditor
+import retention.ignores
 
 class LocalHomesAuditor(LocalFilesAuditor):
     '''
@@ -31,85 +32,6 @@
         self.homes_owners = {}
 
         # FIXME where are these ever used???
-        local_ignores = LocalHomesAuditor.get_local_ignores(self.locations)
-        local_ignored_dirs, local_ignored_files = 
LocalHomesAuditor.process_local_ignores(
-            local_ignores, self.ignored)
-
-    @staticmethod
-    def process_local_ignores(local_ignores, ignored):
-        '''
-        files or dirs listed in data retention conf in homedir
-        are considered 'good' and added to ignore list
-
-        non-absolute paths will be taken as relative to the
-        home dir of the data retention config they were
-        read from
-        '''
-
-        local_ignored_dirs = []
-        local_ignored_files = []
-        for basedir in local_ignores:
-            for path in local_ignores[basedir]:
-                if not path.startswith('/'):
-                    path = os.path.join(basedir, path)
-
-                if path.endswith('/'):
-                    if 'dirs' not in ignored:
-                        ignored['dirs'] = {}
-                    if '/' not in ignored['dirs']:
-                        ignored['dirs']['/'] = []
-
-                    ignored['dirs']['/'].append(path[:-1])
-                    local_ignored_dirs.append(path[:-1])
-                else:
-                    if 'files' not in ignored:
-                        ignored['files'] = {}
-                    if '/' not in ignored['files']:
-                        ignored['files']['/'] = []
-
-                    ignored['files']['/'].append(path)
-                    local_ignored_files.append(path)
-        return local_ignored_dirs, local_ignored_files
-
-    @staticmethod
-    def get_home_dirs(locations):
-        '''
-        get a list of home directories where the root location(s) for home are
-        specified in the Config class (see 'home_locations'), by reading
-        these root location dirs and grabbing all subdirectory names from them
-        '''
-        home_dirs = []
-
-        for location in Config.cf[locations]:
-            if not os.path.isdir(location):
-                continue
-            home_dirs.extend([os.path.join(location, d)
-                              for d in os.listdir(location)
-                              if os.path.isdir(os.path.join(location, d))])
-        return home_dirs
-
-    @staticmethod
-    def get_local_ignores(locations):
-        '''
-        read a list of absolute paths from /home/blah/.data_retention
-        for all blah.  Dirs are specified by op sep at the end ('/')
-        and files without.
-        '''
-        local_ignores = {}
-        home_dirs = LocalHomesAuditor.get_home_dirs(locations)
-        for hdir in home_dirs:
-            local_ignores[hdir] = []
-            if os.path.exists(os.path.join(hdir, ".data_retention")):
-                try:
-                    filep = open(os.path.join(hdir, ".data_retention"))
-                    entries = filep.read().split("\n")
-                    filep.close()
-                except:
-                    pass
-                entries = filter(None, [e.strip() for e in entries])
-                # fixme should sanity check these? ???
-                # what happens if people put wildcards in the wrong
-                # component, or put utter garbage in there, or...?
-                local_ignores[hdir].extend(entries)
-
-        return local_ignores
+        local_ignores = retention.ignores.get_local_ignores(self.locations)
+        local_ignored_dirs, local_ignored_files = 
retention.ignores.process_local_ignores(
+            local_ignores, self.ignores.ignored)
diff --git a/dataretention/retention/remotefileauditor.py 
b/dataretention/retention/remotefileauditor.py
index ed7fbdd..233812e 100644
--- a/dataretention/retention/remotefileauditor.py
+++ b/dataretention/retention/remotefileauditor.py
@@ -18,6 +18,8 @@
 from retention.runner import Runner
 from retention.localfileaudit import LocalFilesAuditor
 import retention.ruleutils
+from retention.ignores import Ignores
+
 
 def get_dirs_toexamine(host_report):
     '''
@@ -121,8 +123,6 @@
         self.store_filepath = store_filepath
         self.verbose = verbose
 
-        self.set_up_ignored()
-
         # need this for locally running jobs
         self.hostname = socket.getfqdn()
 
@@ -151,7 +151,10 @@
         self.cdb.store_db_init(self.expanded_hosts)
         self.set_up_and_export_rule_store()
 
-        self.show_ignored(Config.cf[self.locations])
+        self.ignores = Ignores(self.cdb)
+        self.ignores.set_up_ignored(self.ignore_also)
+        if self.verbose:
+            self.ignores.show_ignored(Config.cf[self.locations])
 
         self.today = time.time()
         self.magic = retention.magic.magic_open(retention.magic.MAGIC_NONE)
@@ -208,29 +211,6 @@
         for host in hosts:
             nicepath = os.path.join(where_to_put, host + ".conf")
             retention.ruleutils.export_rules(self.cdb, nicepath, host)
-
-    def set_up_ignored(self):
-        '''
-        collect up initial list of files/dirs to skip during audit
-        '''
-        self.ignored = {}
-        self.ignored['files'] = Config.cf['ignored_files']
-        self.ignored['dirs'] = Config.cf['ignored_dirs']
-        self.ignored['prefixes'] = Config.cf['ignored_prefixes']
-        self.ignored['extensions'] = Config.cf['ignored_extensions']
-
-        if self.ignore_also is not None:
-            # silently skip paths that are not absolute
-            for path in self.ignore_also:
-                if path.startswith('/'):
-                    if path.endswith('/'):
-                        if '/' not in self.ignored['dirs']:
-                            self.ignored['dirs']['/'] = []
-                        self.ignored['dirs']['/'].append(path[:-1])
-                    else:
-                        if '/' not in self.ignored['files']:
-                            self.ignored['files']['/'] = []
-                        self.ignored['files']['/'].append(path)
 
     def get_perhost_rules_as_json(self):
         '''
@@ -297,41 +277,6 @@
             with open("/srv/salt/audits/retention/configs/allhosts_file.py", 
"w+") as fp:
                 fp.write(self.perhost_raw)
                 fp.close()
-
-    def show_ignored(self, basedirs):
-        if self.verbose:
-            sys.stderr.write(
-                "INFO: The below does not include per-host rules\n")
-            sys.stderr.write(
-                "INFO: or rules derived from the directory status entries.\n")
-
-            sys.stderr.write("INFO: Ignoring the following directories:\n")
-
-            for basedir in self.ignored['dirs']:
-                if basedir in basedirs or basedir == '*' or basedir == '/':
-                    sys.stderr.write(
-                        "INFO: " + ','.join(self.ignored['dirs'][basedir])
-                        + " in " + basedir + '\n')
-
-            sys.stderr.write("INFO: Ignoring the following files:\n")
-            for basedir in self.ignored['files']:
-                if basedir in basedirs or basedir == '*' or basedir == '/':
-                    sys.stderr.write(
-                        "INFO: " + ','.join(self.ignored['files'][basedir])
-                        + " in " + basedir + '\n')
-
-            sys.stderr.write(
-                "INFO: Ignoring files starting with the following:\n")
-            sys.stderr.write(
-                "INFO: " + ','.join(self.ignored['prefixes']) + '\n')
-
-            sys.stderr.write(
-                "INFO: Ignoring files ending with the following:\n")
-            for basedir in self.ignored['extensions']:
-                if basedir in basedirs or basedir == '*':
-                    sys.stderr.write("INFO: " + ','.join(
-                        self.ignored['extensions'][basedir])
-                                     + " in " + basedir + '\n')
 
     def normalize(self, fname):
         '''
@@ -472,7 +417,7 @@
                         print "no output from host", host
         # add some results to rule store
         self.update_status_rules_from_report(result)
-        return result, self.ignored
+        return result, self.ignores.ignored
 
     def update_status_rules_from_report(self, report):
         hostlist = report.keys()
diff --git a/dataretention/retention/retentionaudit.py 
b/dataretention/retention/retentionaudit.py
index 8a30f05..0711ec1 100644
--- a/dataretention/retention/retentionaudit.py
+++ b/dataretention/retention/retentionaudit.py
@@ -8,7 +8,7 @@
 from retention.locallogaudit import LocalLogsAuditor
 from retention.localhomeaudit import LocalHomesAuditor
 from retention.examiner import LocalFileExaminer, LocalDirExaminer
-from retention.userconfretriever import LocalUserCfRetriever
+from retention.ignores import LocalUserCfRetriever
 
 log = logging.getLogger(__name__)
 
diff --git a/dataretention/retention/userconfretriever.py 
b/dataretention/retention/userconfretriever.py
deleted file mode 100644
index 545f934..0000000
--- a/dataretention/retention/userconfretriever.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import sys
-import json
-import salt.client
-
-sys.path.append('/srv/audits/retention/scripts/')
-
-import retention.remotefileauditor
-from retention.localhomeaudit import LocalHomesAuditor
-import retention.utils
-from retention.utils import JsonHelper
-import retention.fileutils
-import retention.ruleutils
-
-class RemoteUserCfRetriever(object):
-    '''
-    retrieval and display dirs / files listed as to
-    be ignored in per-user lists on remote host
-    '''
-    def __init__(self, host, timeout, audit_type):
-        self.host = host
-        self.timeout = timeout
-        self.audit_type = audit_type
-        self.locations = audit_type + "_locations"
-
-    def run(self, quiet=False):
-        '''
-        do all the work
-
-        note that 'quiet' applies only to remotely
-        run, and the same is true for returning the contents.
-        maybe we want to fix that
-        '''
-
-        local_ignores = {}
-
-        client = salt.client.LocalClient()
-        module_args = [self.timeout, self.audit_type]
-
-        result = client.cmd([self.host], "retentionaudit.retrieve_usercfs",
-                            module_args, expr_form='list',
-                            timeout=self.timeout)
-
-        if self.host in result:
-            input = result[self.host]
-            try:
-                local_ignores = json.loads(
-                    input, object_hook=JsonHelper.decode_dict)
-            except:
-                print "WARNING: failed to get local ignores on host",
-                print self.host,
-                print "got this:", input
-                local_ignores = {}
-
-        if not quiet:
-            print local_ignores
-
-        return local_ignores
-
-class LocalUserCfRetriever(object):
-    '''
-    retrieval and display dirs / files listed as to
-    be ignored in per-user lists on local host
-    '''
-    def __init__(self, timeout, audit_type='homes'):
-        self.timeout = timeout
-        self.audit_type = audit_type
-        self.locations = audit_type + "_locations"
-
-    def run(self, quiet=False):
-        '''
-        do all the work
-
-        note that 'quiet' applies only to remotely
-        run, and the same is true for returning the contents.
-        maybe we want to fix that
-        '''
-
-        local_ignores = {}
-
-        local_ignores = LocalHomesAuditor.get_local_ignores(self.locations)
-        output = json.dumps(local_ignores)
-        print output
-        return output

-- 
To view, visit https://gerrit.wikimedia.org/r/233465
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie9b7d995124c5875bf86be590836d963a4e93105
Gerrit-PatchSet: 2
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to