ArielGlenn has submitted this change and it was merged.
Change subject: homes audit done via salt module
......................................................................
homes audit done via salt module
remote homes audit file and class, local homes audit file and class,
stanza in salt module wrapping the local homes audit
Change-Id: Ib1fe6508f2a8d41afaea828d63741667d329d30f
---
M dataretention/data_auditor.py
A dataretention/retention/localhomeaudit.py
A dataretention/retention/remotehomeauditor.py
M dataretention/retention/retentionaudit.py
4 files changed, 245 insertions(+), 7 deletions(-)
Approvals:
ArielGlenn: Verified; Looks good to me, approved
diff --git a/dataretention/data_auditor.py b/dataretention/data_auditor.py
index 0d85c0f..966a19f 100644
--- a/dataretention/data_auditor.py
+++ b/dataretention/data_auditor.py
@@ -6,6 +6,7 @@
from retention.auditor import HomesAuditor
from retention.remotefileauditor import RemoteFilesAuditor
from retention.remotelogauditor import RemoteLogsAuditor
+from retention.remotehomeauditor import RemoteHomesAuditor
from retention.examiner import FileExaminer, DirExaminer
def usage(message=None):
@@ -269,11 +270,11 @@
cmdline.run(report, ignored)
elif audit_type == 'homes':
- homesaudit = HomesAuditor(hosts_expr, audit_type, prettyprint,
- show_sample_content, dirsizes,
- summary_report,
- depth, files_to_check, ignore_also,
- timeout, maxfiles, store_filepath, verbose)
+ homesaudit = RemoteHomesAuditor(hosts_expr, audit_type, prettyprint,
+ show_sample_content, dirsizes,
+ summary_report,
+ depth, files_to_check, ignore_also,
+ timeout, maxfiles, store_filepath,
verbose)
report, ignored = homesaudit.audit_hosts()
if interactive:
cmdline = CommandLine(store_filepath, timeout, audit_type,
hosts_expr)
diff --git a/dataretention/retention/localhomeaudit.py
b/dataretention/retention/localhomeaudit.py
new file mode 100644
index 0000000..c6b92d4
--- /dev/null
+++ b/dataretention/retention/localhomeaudit.py
@@ -0,0 +1,119 @@
+import os
+import sys
+
+sys.path.append('/srv/audits/retention/scripts/')
+
+import retention.utils
+import retention.magic
+from retention.config import Config
+from retention.localfileaudit import LocalFilesAuditor
+
+global_keys = [key for key, value_unused in
+ sys.modules[__name__].__dict__.items()]
+
+
+class LocalHomesAuditor(LocalFilesAuditor):
+ '''
+ auditing of home directories on a set of hosts
+
+ users may have a local '.data_retention' file in their
+ home directories with a list, on entry per line, of files
+ or directories (dirs must end in '/') to skip during the audit
+ '''
+
+ def __init__(self, audit_type,
+ show_content=False, dirsizes=False,
+ depth=2, to_check=None, ignore_also=None, timeout=60,
+ maxfiles=None):
+ '''
+ see FilesAuditor for the arguments to the constructor
+ '''
+ super(LocalHomesAuditor, self).__init__(audit_type,
+ show_content, dirsizes,
+ depth, to_check, ignore_also,
+ timeout, maxfiles)
+ self.homes_owners = {}
+
+ # FIXME where are these ever used???
+ local_ignores = LocalHomesAuditor.get_local_ignores(self.locations)
+ local_ignored_dirs, local_ignored_files =
LocalHomesAuditor.process_local_ignores(
+ local_ignores, self.ignored)
+
+ @staticmethod
+ def process_local_ignores(local_ignores, ignored):
+ '''
+ files or dirs listed in data retention conf in homedir
+ are considered 'good' and added to ignore list
+
+ non-absolute paths will be taken as relative to the
+ home dir of the data retention config they were
+ read from
+ '''
+
+ local_ignored_dirs = []
+ local_ignored_files = []
+ for basedir in local_ignores:
+ for path in local_ignores[basedir]:
+ if not path.startswith('/'):
+ path = os.path.join(basedir, path)
+
+ if path.endswith('/'):
+ if 'dirs' not in ignored:
+ ignored['dirs'] = {}
+ if '/' not in ignored['dirs']:
+ ignored['dirs']['/'] = []
+
+ ignored['dirs']['/'].append(path[:-1])
+ local_ignored_dirs.append(path[:-1])
+ else:
+ if 'files' not in ignored:
+ ignored['files'] = {}
+ if '/' not in ignored['files']:
+ ignored['files']['/'] = []
+
+ ignored['files']['/'].append(path)
+ local_ignored_files.append(path)
+ return local_ignored_dirs, local_ignored_files
+
+ @staticmethod
+ def get_home_dirs(locations):
+ '''
+ get a list of home directories where the root location(s) for home are
+ specified in the Config class (see 'home_locations'), by reading
+ these root location dirs and grabbing all subdirectory names from them
+ '''
+ home_dirs = []
+
+ for location in Config.cf[locations]:
+ if not os.path.isdir(location):
+ continue
+ home_dirs.extend([os.path.join(location, d)
+ for d in os.listdir(location)
+ if os.path.isdir(os.path.join(location, d))])
+ return home_dirs
+
+ @staticmethod
+ def get_local_ignores(locations):
+ '''
+ read a list of absolute paths from /home/blah/.data_retention
+ for all blah. Dirs are specified by op sep at the end ('/')
+ and files without.
+ '''
+ local_ignores = {}
+ home_dirs = LocalHomesAuditor.get_home_dirs(locations)
+ for hdir in home_dirs:
+ local_ignores[hdir] = []
+ if os.path.exists(os.path.join(hdir, ".data_retention")):
+ try:
+ filep = open(os.path.join(hdir, ".data_retention"))
+ entries = filep.read().split("\n")
+ filep.close()
+ except:
+ pass
+ entries = filter(None, [e.strip() for e in entries])
+ # fixme should sanity check these? ???
+ # what happens if people put wildcards in the wrong
+ # component, or put utter garbage in there, or...?
+ local_ignores[hdir].extend(entries)
+
+ return local_ignores
diff --git a/dataretention/retention/remotehomeauditor.py
b/dataretention/retention/remotehomeauditor.py
new file mode 100644
index 0000000..7db2702
--- /dev/null
+++ b/dataretention/retention/remotehomeauditor.py
@@ -0,0 +1,106 @@
+import os
+import sys
+
+sys.path.append('/srv/audits/retention/scripts/')
+
+import retention.utils
+import retention.magic
+from retention.remotefileauditor import RemoteFilesAuditor
+
+global_keys = [key for key, value_unused in
+ sys.modules[__name__].__dict__.items()]
+
+
+class RemoteHomesAuditor(RemoteFilesAuditor):
+ '''
+ auditing of home directories on a set of hosts
+
+ users may have a local '.data_retention' file in their
+ home directories with a list, on entry per line, of files
+ or directories (dirs must end in '/') to skip during the audit
+ '''
+
+ def __init__(self, hosts_expr, audit_type, prettyprint=False,
+ show_content=False, dirsizes=False, summary_report=False,
+ depth=2, to_check=None, ignore_also=None, timeout=60,
+ maxfiles=None, store_filepath=None, verbose=False):
+ '''
+ see FilesAuditor for the arguments to the constructor
+ '''
+ super(RemoteHomesAuditor, self).__init__(hosts_expr, audit_type,
prettyprint,
+ show_content, dirsizes,
+ summary_report, depth,
+ to_check, ignore_also,
timeout,
+ maxfiles, store_filepath,
verbose)
+ self.homes_owners = {}
+
+ def get_audit_args(self):
+ audit_args = [self.show_sample_content,
+ self.dirsizes,
+ self.depth - 1,
+ self.to_check,
+ ",".join(self.ignore_also) if self.ignore_also is not
None else None,
+ self.timeout,
+ self.MAX_FILES]
+ return audit_args
+
+ def display_host_summary(self):
+ '''
+ instead of a detailed report with oe entry per file
+ that may be problematic, display a summary for each homedir
+ on a host
+ '''
+ if self.summary is not None:
+ paths = sorted(self.summary.keys())
+ for path in paths:
+ for group in self.summary[path]:
+ if (self.summary[path][group]['old'] > 0 or
+ self.summary[path][group]['maybe_old'] > 0 or
+ self.summary[path][group]['odd_owner'] > 0):
+ print ("in directory %s, (%s), %d old,"
+ " %d maybe old, %d with odd owner"
+ % (path, group,
+ self.summary[path][group]['old'],
+ self.summary[path][group]['maybe_old'],
+ self.summary[path][group]['odd_owner']))
+
+ def add_stats(self, item, summary):
+ '''
+ gather stats on how many files/dirs
+ may be problematic; summary is where the results
+ are collected, item is the item to include in
+ the summary if needed
+ '''
+ dirname = os.path.dirname(item['path'])
+
+ if dirname not in summary:
+ summary[dirname] = {
+ 'binary': {'old': 0, 'maybe_old': 0, 'odd_owner': 0},
+ 'text': {'old': 0, 'maybe_old': 0, 'odd_owner': 0}
+ }
+ if item['binary'] is True:
+ group = 'binary'
+ else:
+ group = 'text'
+
+ if item['old'] == 'T':
+ summary[dirname][group]['old'] += 1
+ elif item['old'] == '-':
+ summary[dirname][group]['maybe_old'] += 1
+
+ if not item['path'].startswith('/home/'):
+ return
+
+ empty, home, user, rest = item['path'].split(os.path.sep, 3)
+ home_dir = os.path.join(os.path.sep, home, user)
+ if home_dir not in self.homes_owners:
+ try:
+ dirstat = os.stat(home_dir)
+ except:
+ return
+ self.homes_owners[home_dir] = str(dirstat.st_uid)
+
+ if item['owner'] != self.homes_owners[home_dir]:
+ summary[dirname][group]['odd_owner'] += 1
+
+
diff --git a/dataretention/retention/retentionaudit.py
b/dataretention/retention/retentionaudit.py
index 5c3e6c1..8b85771 100644
--- a/dataretention/retention/retentionaudit.py
+++ b/dataretention/retention/retentionaudit.py
@@ -6,6 +6,7 @@
from retention.localfileaudit import LocalFilesAuditor
from retention.locallogaudit import LocalLogsAuditor
+from retention.localhomeaudit import LocalHomesAuditor
log = logging.getLogger(__name__)
@@ -21,8 +22,8 @@
def logaudit_host(oldest, show_content, show_system_logs,
dirsizes, depth,
- to_check, ignore_also, timeout,
- maxfiles):
+ to_check, ignore_also, timeout,
+ maxfiles):
lauditor = LocalLogsAuditor('logs', oldest, show_content,
show_system_logs,
dirsizes, depth, to_check,
@@ -30,3 +31,14 @@
maxfiles)
result = lauditor.do_local_audit()
return result
+
+def homeaudit_host(show_content,
+ dirsizes, depth,
+ to_check, ignore_also, timeout,
+ maxfiles):
+ hauditor = LocalHomesAuditor('homes', show_content,
+ dirsizes, depth, to_check,
+ ignore_also, timeout,
+ maxfiles)
+ result = hauditor.do_local_audit()
+ return result
--
To view, visit https://gerrit.wikimedia.org/r/233455
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ib1fe6508f2a8d41afaea828d63741667d329d30f
Gerrit-PatchSet: 2
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits