ArielGlenn has submitted this change and it was merged.

Change subject: homes audit done via salt module
......................................................................


homes audit done via salt module

remote homes audit file and class, local homes audit file and class,
stanza in salt module wrapping the local homes audit

Change-Id: Ib1fe6508f2a8d41afaea828d63741667d329d30f
---
M dataretention/data_auditor.py
A dataretention/retention/localhomeaudit.py
A dataretention/retention/remotehomeauditor.py
M dataretention/retention/retentionaudit.py
4 files changed, 245 insertions(+), 7 deletions(-)

Approvals:
  ArielGlenn: Verified; Looks good to me, approved



diff --git a/dataretention/data_auditor.py b/dataretention/data_auditor.py
index 0d85c0f..966a19f 100644
--- a/dataretention/data_auditor.py
+++ b/dataretention/data_auditor.py
@@ -6,6 +6,7 @@
 from retention.auditor import HomesAuditor
 from retention.remotefileauditor import RemoteFilesAuditor
 from retention.remotelogauditor import RemoteLogsAuditor
+from retention.remotehomeauditor import RemoteHomesAuditor
 from retention.examiner import FileExaminer, DirExaminer
 
 def usage(message=None):
@@ -269,11 +270,11 @@
             cmdline.run(report, ignored)
 
     elif audit_type == 'homes':
-        homesaudit = HomesAuditor(hosts_expr, audit_type, prettyprint,
-                                  show_sample_content, dirsizes,
-                                  summary_report,
-                                  depth, files_to_check, ignore_also,
-                                  timeout, maxfiles, store_filepath, verbose)
+        homesaudit = RemoteHomesAuditor(hosts_expr, audit_type, prettyprint,
+                                        show_sample_content, dirsizes,
+                                        summary_report,
+                                        depth, files_to_check, ignore_also,
+                                        timeout, maxfiles, store_filepath, 
verbose)
         report, ignored = homesaudit.audit_hosts()
         if interactive:
             cmdline = CommandLine(store_filepath, timeout, audit_type, 
hosts_expr)
diff --git a/dataretention/retention/localhomeaudit.py 
b/dataretention/retention/localhomeaudit.py
new file mode 100644
index 0000000..c6b92d4
--- /dev/null
+++ b/dataretention/retention/localhomeaudit.py
@@ -0,0 +1,119 @@
+import os
+import sys
+
+sys.path.append('/srv/audits/retention/scripts/')
+
+import retention.utils
+import retention.magic
+from retention.config import Config
+from retention.localfileaudit import LocalFilesAuditor
+
+global_keys = [key for key, value_unused in
+               sys.modules[__name__].__dict__.items()]
+
+
+class LocalHomesAuditor(LocalFilesAuditor):
+    '''
+    auditing of home directories on a set of hosts
+
+    users may have a local '.data_retention' file in their
+    home directories with a list, on entry per line, of files
+    or directories (dirs must end in '/') to skip during the audit
+    '''
+
+    def __init__(self, audit_type,
+                 show_content=False, dirsizes=False,
+                 depth=2, to_check=None, ignore_also=None, timeout=60,
+                 maxfiles=None):
+        '''
+        see FilesAuditor for the arguments to the constructor
+        '''
+        super(LocalHomesAuditor, self).__init__(audit_type,
+                                                show_content, dirsizes,
+                                                depth, to_check, ignore_also,
+                                                timeout, maxfiles)
+        self.homes_owners = {}
+
+        # FIXME where are these ever used???
+        local_ignores = LocalHomesAuditor.get_local_ignores(self.locations)
+        local_ignored_dirs, local_ignored_files = 
LocalHomesAuditor.process_local_ignores(
+            local_ignores, self.ignored)
+
+    @staticmethod
+    def process_local_ignores(local_ignores, ignored):
+        '''
+        files or dirs listed in data retention conf in homedir
+        are considered 'good' and added to ignore list
+
+        non-absolute paths will be taken as relative to the
+        home dir of the data retention config they were
+        read from
+        '''
+
+        local_ignored_dirs = []
+        local_ignored_files = []
+        for basedir in local_ignores:
+            for path in local_ignores[basedir]:
+                if not path.startswith('/'):
+                    path = os.path.join(basedir, path)
+
+                if path.endswith('/'):
+                    if 'dirs' not in ignored:
+                        ignored['dirs'] = {}
+                    if '/' not in ignored['dirs']:
+                        ignored['dirs']['/'] = []
+
+                    ignored['dirs']['/'].append(path[:-1])
+                    local_ignored_dirs.append(path[:-1])
+                else:
+                    if 'files' not in ignored:
+                        ignored['files'] = {}
+                    if '/' not in ignored['files']:
+                        ignored['files']['/'] = []
+
+                    ignored['files']['/'].append(path)
+                    local_ignored_files.append(path)
+        return local_ignored_dirs, local_ignored_files
+
+    @staticmethod
+    def get_home_dirs(locations):
+        '''
+        get a list of home directories where the root location(s) for home are
+        specified in the Config class (see 'home_locations'), by reading
+        these root location dirs and grabbing all subdirectory names from them
+        '''
+        home_dirs = []
+
+        for location in Config.cf[locations]:
+            if not os.path.isdir(location):
+                continue
+            home_dirs.extend([os.path.join(location, d)
+                              for d in os.listdir(location)
+                              if os.path.isdir(os.path.join(location, d))])
+        return home_dirs
+
+    @staticmethod
+    def get_local_ignores(locations):
+        '''
+        read a list of absolute paths from /home/blah/.data_retention
+        for all blah.  Dirs are specified by op sep at the end ('/')
+        and files without.
+        '''
+        local_ignores = {}
+        home_dirs = LocalHomesAuditor.get_home_dirs(locations)
+        for hdir in home_dirs:
+            local_ignores[hdir] = []
+            if os.path.exists(os.path.join(hdir, ".data_retention")):
+                try:
+                    filep = open(os.path.join(hdir, ".data_retention"))
+                    entries = filep.read().split("\n")
+                    filep.close()
+                except:
+                    pass
+                entries = filter(None, [e.strip() for e in entries])
+                # fixme should sanity check these? ???
+                # what happens if people put wildcards in the wrong
+                # component, or put utter garbage in there, or...?
+                local_ignores[hdir].extend(entries)
+
+        return local_ignores
diff --git a/dataretention/retention/remotehomeauditor.py 
b/dataretention/retention/remotehomeauditor.py
new file mode 100644
index 0000000..7db2702
--- /dev/null
+++ b/dataretention/retention/remotehomeauditor.py
@@ -0,0 +1,106 @@
+import os
+import sys
+
+sys.path.append('/srv/audits/retention/scripts/')
+
+import retention.utils
+import retention.magic
+from retention.remotefileauditor import RemoteFilesAuditor
+
+global_keys = [key for key, value_unused in
+               sys.modules[__name__].__dict__.items()]
+
+
+class RemoteHomesAuditor(RemoteFilesAuditor):
+    '''
+    auditing of home directories on a set of hosts
+
+    users may have a local '.data_retention' file in their
+    home directories with a list, on entry per line, of files
+    or directories (dirs must end in '/') to skip during the audit
+    '''
+
+    def __init__(self, hosts_expr, audit_type, prettyprint=False,
+                 show_content=False, dirsizes=False, summary_report=False,
+                 depth=2, to_check=None, ignore_also=None, timeout=60,
+                 maxfiles=None, store_filepath=None, verbose=False):
+        '''
+        see FilesAuditor for the arguments to the constructor
+        '''
+        super(RemoteHomesAuditor, self).__init__(hosts_expr, audit_type, 
prettyprint,
+                                                 show_content, dirsizes,
+                                                 summary_report, depth,
+                                                 to_check, ignore_also, 
timeout,
+                                                 maxfiles, store_filepath, 
verbose)
+        self.homes_owners = {}
+
+    def get_audit_args(self):
+        audit_args = [self.show_sample_content,
+                      self.dirsizes,
+                      self.depth - 1,
+                      self.to_check,
+                      ",".join(self.ignore_also) if self.ignore_also is not 
None else None,
+                      self.timeout,
+                      self.MAX_FILES]
+        return audit_args
+
+    def display_host_summary(self):
+        '''
+        instead of a detailed report with oe entry per file
+        that may be problematic, display a summary for each homedir
+        on a host
+        '''
+        if self.summary is not None:
+            paths = sorted(self.summary.keys())
+            for path in paths:
+                for group in self.summary[path]:
+                    if (self.summary[path][group]['old'] > 0 or
+                            self.summary[path][group]['maybe_old'] > 0 or
+                            self.summary[path][group]['odd_owner'] > 0):
+                        print ("in directory %s, (%s), %d old,"
+                               " %d maybe old, %d with odd owner"
+                               % (path, group,
+                                  self.summary[path][group]['old'],
+                                  self.summary[path][group]['maybe_old'],
+                                  self.summary[path][group]['odd_owner']))
+
+    def add_stats(self, item, summary):
+        '''
+        gather stats on how many files/dirs
+        may be problematic; summary is where the results
+        are collected, item is the item to include in
+        the summary if needed
+        '''
+        dirname = os.path.dirname(item['path'])
+
+        if dirname not in summary:
+            summary[dirname] = {
+                'binary': {'old': 0, 'maybe_old': 0, 'odd_owner': 0},
+                'text': {'old': 0, 'maybe_old': 0, 'odd_owner': 0}
+            }
+        if item['binary'] is True:
+            group = 'binary'
+        else:
+            group = 'text'
+
+        if item['old'] == 'T':
+            summary[dirname][group]['old'] += 1
+        elif item['old'] == '-':
+            summary[dirname][group]['maybe_old'] += 1
+
+        if not item['path'].startswith('/home/'):
+            return
+
+        empty, home, user, rest = item['path'].split(os.path.sep, 3)
+        home_dir = os.path.join(os.path.sep, home, user)
+        if home_dir not in self.homes_owners:
+            try:
+                dirstat = os.stat(home_dir)
+            except:
+                return
+            self.homes_owners[home_dir] = str(dirstat.st_uid)
+
+        if item['owner'] != self.homes_owners[home_dir]:
+            summary[dirname][group]['odd_owner'] += 1
+
+
diff --git a/dataretention/retention/retentionaudit.py 
b/dataretention/retention/retentionaudit.py
index 5c3e6c1..8b85771 100644
--- a/dataretention/retention/retentionaudit.py
+++ b/dataretention/retention/retentionaudit.py
@@ -6,6 +6,7 @@
 
 from retention.localfileaudit import LocalFilesAuditor
 from retention.locallogaudit import LocalLogsAuditor
+from retention.localhomeaudit import LocalHomesAuditor
 
 log = logging.getLogger(__name__)
 
@@ -21,8 +22,8 @@
 
 def logaudit_host(oldest, show_content, show_system_logs,
                   dirsizes, depth,
-                   to_check, ignore_also, timeout,
-                   maxfiles):
+                  to_check, ignore_also, timeout,
+                  maxfiles):
     lauditor = LocalLogsAuditor('logs', oldest, show_content,
                                 show_system_logs,
                                 dirsizes, depth, to_check,
@@ -30,3 +31,14 @@
                                 maxfiles)
     result = lauditor.do_local_audit()
     return result
+
+def homeaudit_host(show_content,
+                   dirsizes, depth,
+                   to_check, ignore_also, timeout,
+                   maxfiles):
+    hauditor = LocalHomesAuditor('homes', show_content,
+                                 dirsizes, depth, to_check,
+                                 ignore_also, timeout,
+                                 maxfiles)
+    result = hauditor.do_local_audit()
+    return result

-- 
To view, visit https://gerrit.wikimedia.org/r/233455
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib1fe6508f2a8d41afaea828d63741667d329d30f
Gerrit-PatchSet: 2
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to