ArielGlenn has submitted this change and it was merged.
Change subject: clean up ignore list code
......................................................................
clean up ignore list code
move duplicate ignoreslist methods into module
move ignore list methods/classes into module
Change-Id: Ie9b7d995124c5875bf86be590836d963a4e93105
---
M dataretention/data_auditor.py
M dataretention/retention/cli.py
M dataretention/retention/fileutils.py
M dataretention/retention/ignores.py
M dataretention/retention/localfileaudit.py
M dataretention/retention/localhomeaudit.py
M dataretention/retention/remotefileauditor.py
M dataretention/retention/retentionaudit.py
D dataretention/retention/userconfretriever.py
9 files changed, 380 insertions(+), 402 deletions(-)
Approvals:
ArielGlenn: Verified; Looks good to me, approved
diff --git a/dataretention/data_auditor.py b/dataretention/data_auditor.py
index 9d7c343..37800af 100644
--- a/dataretention/data_auditor.py
+++ b/dataretention/data_auditor.py
@@ -8,7 +8,7 @@
from retention.remotelogauditor import RemoteLogsAuditor
from retention.remotehomeauditor import RemoteHomesAuditor
from retention.examiner import RemoteFileExaminer, RemoteDirExaminer
-from retention.userconfretriever import RemoteUserCfRetriever
+from retention.ignores import RemoteUserCfRetriever
def usage(message=None):
if message:
diff --git a/dataretention/retention/cli.py b/dataretention/retention/cli.py
index 4541a25..d199e29 100644
--- a/dataretention/retention/cli.py
+++ b/dataretention/retention/cli.py
@@ -10,7 +10,6 @@
from retention.status import Status
from retention.rule import RuleStore
import retention.remotefileauditor
-from retention.localhomeaudit import LocalHomesAuditor
from retention.locallogaudit import LocalLogsAuditor
from retention.fileinfo import FileInfo
import retention.utils
@@ -19,9 +18,9 @@
from retention.examiner import RemoteDirExaminer, RemoteFileExaminer
import retention.fileutils
import retention.ruleutils
-from retention.userconfretriever import RemoteUserCfRetriever
import retention.cliutils
-from retention.ignores import Ignores
+from retention.ignores import Ignores, RemoteUserCfRetriever
+import retention.ignores
from retention.completion import Completion
@@ -279,7 +278,7 @@
else:
local_ign = RemoteUserCfRetriever(host_todo, self.timeout,
self.audit_type)
self.local_ignores = local_ign.run(True)
- local_ignored_dirs, local_ignored_files =
LocalHomesAuditor.process_local_ignores(
+ local_ignored_dirs, local_ignored_files =
retention.ignores.process_local_ignores(
self.local_ignores, self.ignored)
self.do_one_host(host_todo, report)
@@ -365,19 +364,19 @@
path = LocalLogsAuditor.normalize(path)
if entrytype == 'file':
- if retention.fileutils.file_is_ignored(path, basedir,
self.ignored):
+ if retention.ignores.file_is_ignored(path, basedir, self.ignored):
return False
# check perhost file
if self.cenv.host in self.ignores.perhost_ignores:
- if retention.fileutils.file_is_ignored(
+ if retention.ignores.file_is_ignored(
path, basedir,
self.ignores.perhost_ignores[self.cenv.host]):
return False
# check perhost rules
if self.cenv.host in self.ignores.perhost_ignores_from_rules:
- if retention.fileutils.file_is_ignored(
+ if retention.ignores.file_is_ignored(
path, basedir,
self.ignores.perhost_ignores_from_rules[self.cenv.host]):
return False
diff --git a/dataretention/retention/fileutils.py
b/dataretention/retention/fileutils.py
index 9c0b537..17531b5 100644
--- a/dataretention/retention/fileutils.py
+++ b/dataretention/retention/fileutils.py
@@ -73,44 +73,6 @@
return True
return False
-def expand_ignored_dirs(basedir, ignored):
- '''
- find dirs to ignore relative to the specified
- basedir, in Config entry. Fall back to wildcard spec
- if there is not entry for the basedir. Dirs in
- Config entry may have one * in the path, this
- will be treated as a wildcard for the purposes
- of checking directories against the entry.
-
- args: absolute path of basedir being crawled
- hash of ignored dirs, file, etc
- returns: list of absolute paths of dirs to ignore,
- plus separate list of abslute paths containing '*',
- also to ignore, or the empty list if there are none
- '''
-
- dirs = []
- wildcard_dirs = []
-
- to_expand = []
- if 'dirs' in ignored:
- if '*' in ignored['dirs']:
- to_expand.extend(ignored['dirs']['*'])
-
- if '/' in ignored['dirs']:
- to_expand.extend(ignored['dirs']['/'])
-
- if basedir in ignored['dirs']:
- to_expand.extend(ignored['dirs'][basedir])
-
- for dname in to_expand:
- if '*' in dname:
- wildcard_dirs.append(os.path.join(basedir, dname))
- else:
- dirs.append(os.path.join(basedir, dname))
-
- return dirs, wildcard_dirs
-
def wildcard_matches(dirname, wildcard_dirs, exact=True):
'''given a list of absolute paths with exactly one '*'
in each entry, see if the passed dirname matches
@@ -128,57 +90,6 @@
return True
else:
continue
- return False
-
-def file_is_ignored(fname, basedir, ignored):
- '''
- pass normalized name (abs path), basedir (location audited),
- hash of ignored files, dirs, prefixes, extensions
- get back True if the file is to be ignored and
- False otherwise
- '''
-
- basename = os.path.basename(fname)
-
- if 'prefixes' in ignored:
- if startswith(basename, ignored['prefixes']):
- return True
-
- if 'extensions' in ignored:
- if '*' in ignored['extensions']:
- if endswith(basename, ignored['extensions']['*']):
- return True
- if basedir in ignored['extensions']:
- if endswith(
- basename, ignored['extensions'][basedir]):
- return True
-
- if 'files' in ignored:
- if basename in ignored['files']:
- return True
- if '*' in ignored['files']:
- if endswith(basename, ignored['files']['*']):
- return True
-
- if '/' in ignored['files']:
- if fname in ignored['files']['/']:
- return True
- if wildcard_matches(
- fname, [w for w in ignored['files']['/'] if '*' in w]):
- return True
-
- if basedir in ignored['files']:
- if endswith(basename, ignored['files'][basedir]):
- return True
- return False
-
-def dir_is_ignored(dirname, ignored):
- expanded_dirs, wildcard_dirs = expand_ignored_dirs(
- os.path.dirname(dirname), ignored)
- if dirname in expanded_dirs:
- return True
- if wildcard_matches(dirname, wildcard_dirs):
- return True
return False
def dir_is_wrong_type(dirname):
diff --git a/dataretention/retention/ignores.py
b/dataretention/retention/ignores.py
index f324eac..482508b 100644
--- a/dataretention/retention/ignores.py
+++ b/dataretention/retention/ignores.py
@@ -1,15 +1,183 @@
import os
import sys
import runpy
+import json
+import salt.client
sys.path.append('/srv/audits/retention/scripts/')
from retention.status import Status
import retention.remotefileauditor
import retention.utils
+from retention.utils import JsonHelper
import retention.fileutils
import retention.ruleutils
import retention.cliutils
+from retention.config import Config
+
+def expand_ignored_dirs(basedir, ignored):
+ '''
+ find dirs to ignore relative to the specified
+ basedir, in Config entry. Fall back to wildcard spec
+ if there is not entry for the basedir. Dirs in
+ Config entry may have one * in the path, this
+ will be treated as a wildcard for the purposes
+ of checking directories against the entry.
+
+ args: absolute path of basedir being crawled
+ hash of ignored dirs, file, etc
+ returns: list of absolute paths of dirs to ignore,
+ plus separate list of abslute paths containing '*',
+ also to ignore, or the empty list if there are none
+ '''
+
+ dirs = []
+ wildcard_dirs = []
+
+ to_expand = []
+ if 'dirs' in ignored:
+ if '*' in ignored['dirs']:
+ to_expand.extend(ignored['dirs']['*'])
+
+ if '/' in ignored['dirs']:
+ to_expand.extend(ignored['dirs']['/'])
+
+ if basedir in ignored['dirs']:
+ to_expand.extend(ignored['dirs'][basedir])
+
+ for dname in to_expand:
+ if '*' in dname:
+ wildcard_dirs.append(os.path.join(basedir, dname))
+ else:
+ dirs.append(os.path.join(basedir, dname))
+ return dirs, wildcard_dirs
+
+def dir_is_ignored(dirname, ignored):
+ expanded_dirs, wildcard_dirs = expand_ignored_dirs(
+ os.path.dirname(dirname), ignored)
+ if dirname in expanded_dirs:
+ return True
+ if wildcard_matches(dirname, wildcard_dirs):
+ return True
+ return False
+
+def file_is_ignored(fname, basedir, ignored):
+ '''
+ pass normalized name (abs path), basedir (location audited),
+ hash of ignored files, dirs, prefixes, extensions
+ get back True if the file is to be ignored and
+ False otherwise
+ '''
+
+ basename = os.path.basename(fname)
+
+ if 'prefixes' in ignored:
+ if startswith(basename, ignored['prefixes']):
+ return True
+
+ if 'extensions' in ignored:
+ if '*' in ignored['extensions']:
+ if endswith(basename, ignored['extensions']['*']):
+ return True
+ if basedir in ignored['extensions']:
+ if endswith(
+ basename, ignored['extensions'][basedir]):
+ return True
+
+ if 'files' in ignored:
+ if basename in ignored['files']:
+ return True
+ if '*' in ignored['files']:
+ if endswith(basename, ignored['files']['*']):
+ return True
+
+ if '/' in ignored['files']:
+ if fname in ignored['files']['/']:
+ return True
+ if wildcard_matches(
+ fname, [w for w in ignored['files']['/'] if '*' in w]):
+ return True
+
+ if basedir in ignored['files']:
+ if endswith(basename, ignored['files'][basedir]):
+ return True
+ return False
+
+def get_home_dirs(locations):
+ '''
+ get a list of home directories where the root location(s) for home are
+ specified in the Config class (see 'home_locations'), by reading
+ these root location dirs and grabbing all subdirectory names from them
+ '''
+ home_dirs = []
+
+ for location in Config.cf[locations]:
+ if not os.path.isdir(location):
+ continue
+ home_dirs.extend([os.path.join(location, d)
+ for d in os.listdir(location)
+ if os.path.isdir(os.path.join(location, d))])
+ return home_dirs
+
+def get_local_ignores(locations):
+ '''
+ read a list of absolute paths from /home/blah/.data_retention
+ for all blah. Dirs are specified by op sep at the end ('/')
+ and files without.
+ '''
+ local_ignores = {}
+ home_dirs = get_home_dirs(locations)
+ for hdir in home_dirs:
+ local_ignores[hdir] = []
+ if os.path.exists(os.path.join(hdir, ".data_retention")):
+ try:
+ filep = open(os.path.join(hdir, ".data_retention"))
+ entries = filep.read().split("\n")
+ filep.close()
+ except:
+ pass
+ entries = filter(None, [e.strip() for e in entries])
+ # fixme should sanity check these? ???
+ # what happens if people put wildcards in the wrong
+ # component, or put utter garbage in there, or...?
+ local_ignores[hdir].extend(entries)
+ return local_ignores
+
+def process_local_ignores(local_ignores, ignored):
+ '''
+ files or dirs listed in data retention conf in homedir
+ are considered 'good' and added to ignore list
+
+ non-absolute paths will be taken as relative to the
+ home dir of the data retention config they were
+ read from
+ '''
+
+ local_ignored_dirs = []
+ local_ignored_files = []
+ for basedir in local_ignores:
+ for path in local_ignores[basedir]:
+ if not path.startswith('/'):
+ path = os.path.join(basedir, path)
+
+ if path.endswith('/'):
+ if 'dirs' not in ignored:
+ ignored['dirs'] = {}
+ if '/' not in ignored['dirs']:
+ ignored['dirs']['/'] = []
+
+ ignored['dirs']['/'].append(path[:-1])
+ local_ignored_dirs.append(path[:-1])
+ else:
+ if 'files' not in ignored:
+ ignored['files'] = {}
+ if '/' not in ignored['files']:
+ ignored['files']['/'] = []
+
+ ignored['files']['/'].append(path)
+ local_ignored_files.append(path)
+ return local_ignored_dirs, local_ignored_files
+
class Ignores(object):
'''
@@ -20,11 +188,39 @@
def __init__(self, cdb):
self.cdb = cdb
self.perhost_rules_from_file = None
- self.hosts = self.cdb.store_db_list_all_hosts()
+ if cdb is not None:
+ self.hosts = self.cdb.store_db_list_all_hosts()
+ else:
+ self.hosts = None
+
self.perhost_ignores = {}
self.perhost_ignores_from_rules = {}
self.perhost_rules_from_store = {}
self.get_perhost_cf_from_file()
+ self.ignored = {}
+
+ def set_up_ignored(self, ignore_also):
+ '''
+ collect up initial list of files/dirs to skip during audit
+ '''
+
+ self.ignored['files'] = Config.cf['ignored_files']
+ self.ignored['dirs'] = Config.cf['ignored_dirs']
+ self.ignored['prefixes'] = Config.cf['ignored_prefixes']
+ self.ignored['extensions'] = Config.cf['ignored_extensions']
+
+ if ignore_also is not None:
+ # silently skip paths that are not absolute
+ for path in ignore_also:
+ if path.startswith('/'):
+ if path.endswith('/'):
+ if '/' not in self.ignored['dirs']:
+ self.ignored['dirs']['/'] = []
+ self.ignored['dirs']['/'].append(path[:-1])
+ else:
+ if '/' not in self.ignored['files']:
+ self.ignored['files']['/'] = []
+ self.ignored['files']['/'].append(path)
def get_perhost_from_rules(self, hosts=None):
if hosts == None:
@@ -69,21 +265,154 @@
except:
self.perhost_rules_from_file = None
- if self.perhost_rules_from_file is not None:
- if 'ignored_dirs' in self.perhost_rules_from_file:
- for host in self.perhost_rules_from_file['ignored_dirs']:
- if host not in self.perhost_ignores:
- self.perhost_ignores[host] = {}
- self.perhost_ignores[host]['dirs'] = {}
- self.perhost_ignores[host]['dirs']['/'] = [
- (lambda path: path[:-1] if path[-1] == '/'
- else path)(p)
- for p in self.perhost_rules_from_file[
+ if self.perhost_rules_from_file is None:
+ return
+
+ if 'ignored_dirs' in self.perhost_rules_from_file:
+ for host in self.perhost_rules_from_file['ignored_dirs']:
+ if host not in self.perhost_ignores:
+ self.perhost_ignores[host] = {}
+ self.perhost_ignores[host]['dirs'] = {}
+ self.perhost_ignores[host]['dirs']['/'] = [
+ (lambda path: path[:-1] if path[-1] == '/'
+ else path)(p)
+ for p in self.perhost_rules_from_file[
'ignored_dirs'][host]]
- if 'ignored_files' in self.perhost_rules_from_file:
- for host in self.perhost_rules_from_file['ignored_files']:
- if host not in self.perhost_ignores:
- self.perhost_ignores[host] = {}
- self.perhost_ignores[host]['files'] = {}
- self.perhost_ignores[host]['files']['/'] = (
- self.perhost_rules_from_file['ignored_files'][host])
+ if 'ignored_files' in self.perhost_rules_from_file:
+ for host in self.perhost_rules_from_file['ignored_files']:
+ if host not in self.perhost_ignores:
+ self.perhost_ignores[host] = {}
+ self.perhost_ignores[host]['files'] = {}
+ self.perhost_ignores[host]['files']['/'] = (
+ self.perhost_rules_from_file['ignored_files'][host])
+
+ def add_perhost_rules_to_ignored(self, host):
+ '''
+ add dirs/files to be skipped during audit based
+ on rules in the rule store db
+ '''
+ if '/' not in self.ignored['dirs']:
+ self.ignored['dirs']['/'] = []
+ if '/' not in self.ignored['files']:
+ self.ignored['files']['/'] = []
+ if host not in self.perhost_rules_from_store:
+ return
+
+ for rule in self.perhost_rules_from_store[host]:
+ path = os.path.join(rule['basedir'], rule['name'])
+ if rule['status'] == 'good':
+ if retention.ruleutils.entrytype_to_text(rule['type']) ==
'dir':
+ if path not in self.ignored['dirs']['/']:
+ self.ignored['dirs']['/'].append(path)
+ elif retention.ruleutils.entrytype_to_text(rule['type']) ==
'file':
+ if path not in self.ignored['files']['/']:
+ self.ignored['files']['/'].append(path)
+ else:
+ # some other random type, don't care
+ continue
+
+ def show_ignored(self, basedirs):
+ sys.stderr.write(
+ "INFO: The below does not include per-host rules\n")
+ sys.stderr.write(
+ "INFO: or rules derived from the directory status entries.\n")
+
+ sys.stderr.write("INFO: Ignoring the following directories:\n")
+
+ for basedir in self.ignored['dirs']:
+ if basedir in basedirs or basedir == '*' or basedir == '/':
+ sys.stderr.write(
+ "INFO: " + ','.join(self.ignored['dirs'][basedir])
+ + " in " + basedir + '\n')
+
+ sys.stderr.write("INFO: Ignoring the following files:\n")
+ for basedir in self.ignored['files']:
+ if basedir in basedirs or basedir == '*' or basedir == '/':
+ sys.stderr.write(
+ "INFO: " + ','.join(self.ignored['files'][basedir])
+ + " in " + basedir + '\n')
+
+ sys.stderr.write(
+ "INFO: Ignoring files starting with the following:\n")
+ sys.stderr.write(
+ "INFO: " + ','.join(self.ignored['prefixes']) + '\n')
+
+ sys.stderr.write(
+ "INFO: Ignoring files ending with the following:\n")
+ for basedir in self.ignored['extensions']:
+ if basedir in basedirs or basedir == '*':
+ sys.stderr.write("INFO: " + ','.join(
+ self.ignored['extensions'][basedir])
+ + " in " + basedir + '\n')
+
+
+class RemoteUserCfRetriever(object):
+ '''
+ retrieval and display dirs / files listed as to
+ be ignored in per-user lists on remote host
+ '''
+ def __init__(self, host, timeout, audit_type):
+ self.host = host
+ self.timeout = timeout
+ self.audit_type = audit_type
+ self.locations = audit_type + "_locations"
+
+ def run(self, quiet=False):
+ '''
+ do all the work
+
+ note that 'quiet' applies only to remotely
+ run, and the same is true for returning the contents.
+ maybe we want to fix that
+ '''
+
+ local_ignores = {}
+
+ client = salt.client.LocalClient()
+ module_args = [self.timeout, self.audit_type]
+
+ result = client.cmd([self.host], "retentionaudit.retrieve_usercfs",
+ module_args, expr_form='list',
+ timeout=self.timeout)
+
+ if self.host in result:
+ input = result[self.host]
+ try:
+ local_ignores = json.loads(
+ input, object_hook=JsonHelper.decode_dict)
+ except:
+ print "WARNING: failed to get local ignores on host",
+ print self.host,
+ print "got this:", input
+ local_ignores = {}
+
+ if not quiet:
+ print local_ignores
+
+ return local_ignores
+
+class LocalUserCfRetriever(object):
+ '''
+ retrieval and display dirs / files listed as to
+ be ignored in per-user lists on local host
+ '''
+ def __init__(self, timeout, audit_type='homes'):
+ self.timeout = timeout
+ self.audit_type = audit_type
+ self.locations = audit_type + "_locations"
+
+ def run(self, quiet=False):
+ '''
+ do all the work
+
+ note that 'quiet' applies only to remotely
+ run, and the same is true for returning the contents.
+ maybe we want to fix that
+ '''
+
+ local_ignores = {}
+
+ local_ignores = get_local_ignores(self.locations)
+ output = json.dumps(local_ignores)
+ print output
+ return output
diff --git a/dataretention/retention/localfileaudit.py
b/dataretention/retention/localfileaudit.py
index 24b54c2..77538d1 100644
--- a/dataretention/retention/localfileaudit.py
+++ b/dataretention/retention/localfileaudit.py
@@ -15,7 +15,8 @@
from retention.fileinfo import FileInfo
import retention.fileutils
import retention.ruleutils
-
+from retention.ignores import Ignores
+import retention.ignores
class LocalFilesAuditor(object):
'''
@@ -65,7 +66,8 @@
self.timeout = timeout
self.ignored = {}
- self.set_up_ignored()
+ self.ignores = Ignores(None)
+ self.ignores.set_up_ignored()
self.hostname = socket.getfqdn()
@@ -120,69 +122,22 @@
'/srv/audits/retention/configs/allhosts_file.cf')['perhostcf']
if self.perhost_rules_from_store is not None:
- self.add_perhost_rules_to_ignored()
+ self.ignores.add_perhost_rules_to_ignored(self.hostname)
if (self.perhost_rules_from_file is not None and
'ignored_dirs' in self.perhost_rules_from_file):
- if '/' not in self.ignored['dirs']:
- self.ignored['dirs']['/'] = []
+ if '/' not in self.ignores.ignored['dirs']:
+ self.ignores.ignored['dirs']['/'] = []
if self.hostname in self.perhost_rules_from_file['ignored_dirs']:
for path in self.perhost_rules_from_file[
'ignored_dirs'][self.hostname]:
if path.startswith('/'):
- self.ignored['dirs']['/'].append(path)
+ self.ignores.ignored['dirs']['/'].append(path)
if '*' in self.perhost_rules_from_file['ignored_dirs']:
for path in self.perhost_rules_from_file[
'ignored_dirs'][self.hostname]:
if path.startswith('/'):
- self.ignored['dirs']['/'].append(path)
-
- def set_up_ignored(self):
- '''
- collect up initial list of files/dirs to skip during audit
- '''
- self.ignored['files'] = Config.cf['ignored_files']
- self.ignored['dirs'] = Config.cf['ignored_dirs']
- self.ignored['prefixes'] = Config.cf['ignored_prefixes']
- self.ignored['extensions'] = Config.cf['ignored_extensions']
-
- if self.ignore_also is not None:
- # silently skip paths that are not absolute
- for path in self.ignore_also:
- if path.startswith('/'):
- if path.endswith('/'):
- if '/' not in self.ignored['dirs']:
- self.ignored['dirs']['/'] = []
- self.ignored['dirs']['/'].append(path[:-1])
- else:
- if '/' not in self.ignored['files']:
- self.ignored['files']['/'] = []
- self.ignored['files']['/'].append(path)
-
- def add_perhost_rules_to_ignored(self):
- '''
- add dirs/files to be skipped during audit based
- on rules in the rule store db
- '''
- if '/' not in self.ignored['dirs']:
- self.ignored['dirs']['/'] = []
- if '/' not in self.ignored['files']:
- self.ignored['files']['/'] = []
- for host in self.perhost_rules_from_store:
- if host == self.hostname:
- for rule in self.perhost_rules_from_store[host]:
- path = os.path.join(rule['basedir'], rule['name'])
- if rule['status'] == 'good':
- if retention.ruleutils.entrytype_to_text(rule['type'])
== 'dir':
- if path not in self.ignored['dirs']['/']:
- self.ignored['dirs']['/'].append(path)
- elif
retention.ruleutils.entrytype_to_text(rule['type']) == 'file':
- if path not in self.ignored['files']['/']:
- self.ignored['files']['/'].append(path)
- else:
- # some other random type, don't care
- continue
- break
+ self.ignores.ignored['dirs']['/'].append(path)
def normalize(self, fname):
'''
@@ -202,7 +157,7 @@
'''
fname = self.normalize(fname)
- if retention.fileutils.file_is_ignored(fname, basedir, self.ignored):
+ if retention.ignores.file_is_ignored(fname, basedir,
self.ignores.ignored):
return False
if (self.filenames_to_check is not None and
@@ -214,7 +169,7 @@
def get_subdirs_to_do(self, dirname, dirname_depth, todo):
locale.setlocale(locale.LC_ALL, '')
- if retention.fileutils.dir_is_ignored(dirname, self.ignored):
+ if retention.fileutils.dir_is_ignored(dirname, self.ignores.ignored):
return todo
if retention.fileutils.dir_is_wrong_type(dirname):
return todo
@@ -328,7 +283,7 @@
if not retention.fileutils.dirtree_check(subdirpath,
self.dirs_to_check):
return
- if retention.fileutils.dir_is_ignored(subdirpath, self.ignored):
+ if retention.fileutils.dir_is_ignored(subdirpath,
self.ignores.ignored):
return True
count = 0
@@ -356,8 +311,8 @@
# cutoff won't be in our list
temp_results = []
for base, paths, files in self.walk_nolinks(subdirpath):
- expanded_dirs, wildcard_dirs =
retention.fileutils.expand_ignored_dirs(
- base, self.ignored)
+ expanded_dirs, wildcard_dirs =
retention.ignores.expand_ignored_dirs(
+ base, self.ignores.ignored)
if self.dirs_to_check is not None:
paths[:] = [p for p in paths
if
retention.fileutils.dirtree_check(os.path.join(base, p),
diff --git a/dataretention/retention/localhomeaudit.py
b/dataretention/retention/localhomeaudit.py
index 10bdcf2..2936990 100644
--- a/dataretention/retention/localhomeaudit.py
+++ b/dataretention/retention/localhomeaudit.py
@@ -7,6 +7,7 @@
import retention.magic
from retention.config import Config
from retention.localfileaudit import LocalFilesAuditor
+import retention.ignores
class LocalHomesAuditor(LocalFilesAuditor):
'''
@@ -31,85 +32,6 @@
self.homes_owners = {}
# FIXME where are these ever used???
- local_ignores = LocalHomesAuditor.get_local_ignores(self.locations)
- local_ignored_dirs, local_ignored_files =
LocalHomesAuditor.process_local_ignores(
- local_ignores, self.ignored)
-
- @staticmethod
- def process_local_ignores(local_ignores, ignored):
- '''
- files or dirs listed in data retention conf in homedir
- are considered 'good' and added to ignore list
-
- non-absolute paths will be taken as relative to the
- home dir of the data retention config they were
- read from
- '''
-
- local_ignored_dirs = []
- local_ignored_files = []
- for basedir in local_ignores:
- for path in local_ignores[basedir]:
- if not path.startswith('/'):
- path = os.path.join(basedir, path)
-
- if path.endswith('/'):
- if 'dirs' not in ignored:
- ignored['dirs'] = {}
- if '/' not in ignored['dirs']:
- ignored['dirs']['/'] = []
-
- ignored['dirs']['/'].append(path[:-1])
- local_ignored_dirs.append(path[:-1])
- else:
- if 'files' not in ignored:
- ignored['files'] = {}
- if '/' not in ignored['files']:
- ignored['files']['/'] = []
-
- ignored['files']['/'].append(path)
- local_ignored_files.append(path)
- return local_ignored_dirs, local_ignored_files
-
- @staticmethod
- def get_home_dirs(locations):
- '''
- get a list of home directories where the root location(s) for home are
- specified in the Config class (see 'home_locations'), by reading
- these root location dirs and grabbing all subdirectory names from them
- '''
- home_dirs = []
-
- for location in Config.cf[locations]:
- if not os.path.isdir(location):
- continue
- home_dirs.extend([os.path.join(location, d)
- for d in os.listdir(location)
- if os.path.isdir(os.path.join(location, d))])
- return home_dirs
-
- @staticmethod
- def get_local_ignores(locations):
- '''
- read a list of absolute paths from /home/blah/.data_retention
- for all blah. Dirs are specified by op sep at the end ('/')
- and files without.
- '''
- local_ignores = {}
- home_dirs = LocalHomesAuditor.get_home_dirs(locations)
- for hdir in home_dirs:
- local_ignores[hdir] = []
- if os.path.exists(os.path.join(hdir, ".data_retention")):
- try:
- filep = open(os.path.join(hdir, ".data_retention"))
- entries = filep.read().split("\n")
- filep.close()
- except:
- pass
- entries = filter(None, [e.strip() for e in entries])
- # fixme should sanity check these? ???
- # what happens if people put wildcards in the wrong
- # component, or put utter garbage in there, or...?
- local_ignores[hdir].extend(entries)
-
- return local_ignores
+ local_ignores = retention.ignores.get_local_ignores(self.locations)
+ local_ignored_dirs, local_ignored_files =
retention.ignores.process_local_ignores(
+ local_ignores, self.ignores.ignored)
diff --git a/dataretention/retention/remotefileauditor.py
b/dataretention/retention/remotefileauditor.py
index ed7fbdd..233812e 100644
--- a/dataretention/retention/remotefileauditor.py
+++ b/dataretention/retention/remotefileauditor.py
@@ -18,6 +18,8 @@
from retention.runner import Runner
from retention.localfileaudit import LocalFilesAuditor
import retention.ruleutils
+from retention.ignores import Ignores
+
def get_dirs_toexamine(host_report):
'''
@@ -121,8 +123,6 @@
self.store_filepath = store_filepath
self.verbose = verbose
- self.set_up_ignored()
-
# need this for locally running jobs
self.hostname = socket.getfqdn()
@@ -151,7 +151,10 @@
self.cdb.store_db_init(self.expanded_hosts)
self.set_up_and_export_rule_store()
- self.show_ignored(Config.cf[self.locations])
+ self.ignores = Ignores(self.cdb)
+ self.ignores.set_up_ignored(self.ignore_also)
+ if self.verbose:
+ self.ignores.show_ignored(Config.cf[self.locations])
self.today = time.time()
self.magic = retention.magic.magic_open(retention.magic.MAGIC_NONE)
@@ -208,29 +211,6 @@
for host in hosts:
nicepath = os.path.join(where_to_put, host + ".conf")
retention.ruleutils.export_rules(self.cdb, nicepath, host)
-
- def set_up_ignored(self):
- '''
- collect up initial list of files/dirs to skip during audit
- '''
- self.ignored = {}
- self.ignored['files'] = Config.cf['ignored_files']
- self.ignored['dirs'] = Config.cf['ignored_dirs']
- self.ignored['prefixes'] = Config.cf['ignored_prefixes']
- self.ignored['extensions'] = Config.cf['ignored_extensions']
-
- if self.ignore_also is not None:
- # silently skip paths that are not absolute
- for path in self.ignore_also:
- if path.startswith('/'):
- if path.endswith('/'):
- if '/' not in self.ignored['dirs']:
- self.ignored['dirs']['/'] = []
- self.ignored['dirs']['/'].append(path[:-1])
- else:
- if '/' not in self.ignored['files']:
- self.ignored['files']['/'] = []
- self.ignored['files']['/'].append(path)
def get_perhost_rules_as_json(self):
'''
@@ -297,41 +277,6 @@
with open("/srv/salt/audits/retention/configs/allhosts_file.py",
"w+") as fp:
fp.write(self.perhost_raw)
fp.close()
-
- def show_ignored(self, basedirs):
- if self.verbose:
- sys.stderr.write(
- "INFO: The below does not include per-host rules\n")
- sys.stderr.write(
- "INFO: or rules derived from the directory status entries.\n")
-
- sys.stderr.write("INFO: Ignoring the following directories:\n")
-
- for basedir in self.ignored['dirs']:
- if basedir in basedirs or basedir == '*' or basedir == '/':
- sys.stderr.write(
- "INFO: " + ','.join(self.ignored['dirs'][basedir])
- + " in " + basedir + '\n')
-
- sys.stderr.write("INFO: Ignoring the following files:\n")
- for basedir in self.ignored['files']:
- if basedir in basedirs or basedir == '*' or basedir == '/':
- sys.stderr.write(
- "INFO: " + ','.join(self.ignored['files'][basedir])
- + " in " + basedir + '\n')
-
- sys.stderr.write(
- "INFO: Ignoring files starting with the following:\n")
- sys.stderr.write(
- "INFO: " + ','.join(self.ignored['prefixes']) + '\n')
-
- sys.stderr.write(
- "INFO: Ignoring files ending with the following:\n")
- for basedir in self.ignored['extensions']:
- if basedir in basedirs or basedir == '*':
- sys.stderr.write("INFO: " + ','.join(
- self.ignored['extensions'][basedir])
- + " in " + basedir + '\n')
def normalize(self, fname):
'''
@@ -472,7 +417,7 @@
print "no output from host", host
# add some results to rule store
self.update_status_rules_from_report(result)
- return result, self.ignored
+ return result, self.ignores.ignored
def update_status_rules_from_report(self, report):
hostlist = report.keys()
diff --git a/dataretention/retention/retentionaudit.py
b/dataretention/retention/retentionaudit.py
index 8a30f05..0711ec1 100644
--- a/dataretention/retention/retentionaudit.py
+++ b/dataretention/retention/retentionaudit.py
@@ -8,7 +8,7 @@
from retention.locallogaudit import LocalLogsAuditor
from retention.localhomeaudit import LocalHomesAuditor
from retention.examiner import LocalFileExaminer, LocalDirExaminer
-from retention.userconfretriever import LocalUserCfRetriever
+from retention.ignores import LocalUserCfRetriever
log = logging.getLogger(__name__)
diff --git a/dataretention/retention/userconfretriever.py
b/dataretention/retention/userconfretriever.py
deleted file mode 100644
index 545f934..0000000
--- a/dataretention/retention/userconfretriever.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import sys
-import json
-import salt.client
-
-sys.path.append('/srv/audits/retention/scripts/')
-
-import retention.remotefileauditor
-from retention.localhomeaudit import LocalHomesAuditor
-import retention.utils
-from retention.utils import JsonHelper
-import retention.fileutils
-import retention.ruleutils
-
-class RemoteUserCfRetriever(object):
- '''
- retrieval and display dirs / files listed as to
- be ignored in per-user lists on remote host
- '''
- def __init__(self, host, timeout, audit_type):
- self.host = host
- self.timeout = timeout
- self.audit_type = audit_type
- self.locations = audit_type + "_locations"
-
- def run(self, quiet=False):
- '''
- do all the work
-
- note that 'quiet' applies only to remotely
- run, and the same is true for returning the contents.
- maybe we want to fix that
- '''
-
- local_ignores = {}
-
- client = salt.client.LocalClient()
- module_args = [self.timeout, self.audit_type]
-
- result = client.cmd([self.host], "retentionaudit.retrieve_usercfs",
- module_args, expr_form='list',
- timeout=self.timeout)
-
- if self.host in result:
- input = result[self.host]
- try:
- local_ignores = json.loads(
- input, object_hook=JsonHelper.decode_dict)
- except:
- print "WARNING: failed to get local ignores on host",
- print self.host,
- print "got this:", input
- local_ignores = {}
-
- if not quiet:
- print local_ignores
-
- return local_ignores
-
-class LocalUserCfRetriever(object):
- '''
- retrieval and display dirs / files listed as to
- be ignored in per-user lists on local host
- '''
- def __init__(self, timeout, audit_type='homes'):
- self.timeout = timeout
- self.audit_type = audit_type
- self.locations = audit_type + "_locations"
-
- def run(self, quiet=False):
- '''
- do all the work
-
- note that 'quiet' applies only to remotely
- run, and the same is true for returning the contents.
- maybe we want to fix that
- '''
-
- local_ignores = {}
-
- local_ignores = LocalHomesAuditor.get_local_ignores(self.locations)
- output = json.dumps(local_ignores)
- print output
- return output
--
To view, visit https://gerrit.wikimedia.org/r/233465
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie9b7d995124c5875bf86be590836d963a4e93105
Gerrit-PatchSet: 2
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits