ArielGlenn has submitted this change and it was merged.

Change subject: more pylint/pep8 cleanup
......................................................................


more pylint/pep8 cleanup

remove some blank lines
clean up some duplicated code
move some cli code into separate methods
now that all ignored objects have the same structure, clean up
  show_ignored accordingly
move log summary code into its own method
move more cli command handling into their own methods
move most static methods out of entryinfo, fileinfo to module methods

Change-Id: I3a359e03925d0f669eff0b216db859efe55d20c2
---
M dataretention/retention/cli.py
M dataretention/retention/cliutils.py
M dataretention/retention/fileinfo.py
M dataretention/retention/ignores.py
M dataretention/retention/localexaminer.py
M dataretention/retention/localfileaudit.py
M dataretention/retention/locallogaudit.py
M dataretention/retention/remotefileauditor.py
M dataretention/retention/remotelogauditor.py
M dataretention/retention/remoteusercfgrabber.py
M dataretention/retention/rule.py
M dataretention/retention/saltclientplus.py
12 files changed, 482 insertions(+), 520 deletions(-)

Approvals:
  ArielGlenn: Verified; Looks good to me, approved



diff --git a/dataretention/retention/cli.py b/dataretention/retention/cli.py
index da6b40b..0ff6c4b 100644
--- a/dataretention/retention/cli.py
+++ b/dataretention/retention/cli.py
@@ -9,6 +9,7 @@
 import retention.remotefileauditor
 from clouseau.retention.locallogaudit import LocalLogsAuditor
 from clouseau.retention.fileinfo import FileInfo
+import clouseau.retention.fileinfo
 from clouseau.retention.utils import JsonHelper
 import clouseau.retention.config
 from retention.remoteexaminer import RemoteDirExaminer, RemoteFileExaminer
@@ -105,6 +106,22 @@
 #                sys.stderr.write(repr(traceback.format_exception(
 #                    exc_type, exc_value, exc_traceback)))
 
+    def filter_items(self, filtertype, check_not_ignored):
+        keys = self.entries_dict.keys()
+        if filtertype == 'file':
+            types = ['file']
+        elif filtertype == 'dir':
+            types = ['dir']
+        else:
+            types = ['dir', 'file']
+        items = []
+        for ftype in types:
+            items = items + (sorted(
+                item for item in keys
+                if (self.entries_dict[item]['type'] == ftype and
+                    check_not_ignored(self.entries_dict[item]['path'],
+                                      self.entries_dict[item]['type'], 
ftype))))
+        return items
 
     def show(self, host, path, batchno, filtertype, check_not_ignored, 
force=False):
         self.get(host, path, batchno, force)
@@ -112,33 +129,7 @@
         # fixme this 50 is pretty arbitrary oh well
         justify = 50
 
-        keys = self.entries_dict.keys()
-        if filtertype == 'file':
-            items = (sorted(
-                item for item in keys
-                if self.entries_dict[item]['type'] == 'file'))
-        elif filtertype == 'dir':
-            items = (sorted(
-                item for item in keys
-                if self.entries_dict[item]['type'] == 'dir'))
-        elif filtertype == 'all':
-            items = sorted(
-                item for item in keys
-                if self.entries_dict[item]['type'] == 'dir')
-            items = items + sorted(
-                item for item in keys
-                if self.entries_dict[item]['type'] == 'file')
-        elif filtertype == 'check':
-            items = sorted(
-                item for item in keys
-                if self.entries_dict[item]['type'] == 'dir'
-                and check_not_ignored(self.entries_dict[item]['path'],
-                                      self.entries_dict[item]['type']))
-            items = items + sorted(
-                item for item in keys
-                if self.entries_dict[item]['type'] == 'file'
-                and check_not_ignored(self.entries_dict[item]['path'],
-                                      self.entries_dict[item]['type']))
+        items = self.filter_items(filtertype, check_not_ignored)
 
         page = 1
         num_per_page = 50  # another arbitrary value
@@ -159,7 +150,7 @@
                     # fixme why do we have an empty item I wonder
                     continue
                 try:
-                    result = FileInfo.format_pretty_output_from_dict(
+                    result = 
clouseau.retention.fileinfo.format_pretty_output_from_dict(
                         self.entries_dict[item], path_justify=justify)
                 except:
                     print "item is", item
@@ -256,8 +247,7 @@
             print "No problem dirs and no skipped dirs on this host"
         else:
             dirs_problem_to_depth = 
[clouseau.retention.cliutils.get_path_prefix(
-                d, self.max_depth_top_level)
-                                     for d in dirs_problem]
+                d, self.max_depth_top_level) for d in dirs_problem]
             dirs_skipped = [s for s in dirs_skipped
                             if s not in dirs_problem_to_depth]
             relevant_dirs = (sorted(list(set(dirs_problem_to_depth)))
@@ -377,7 +367,16 @@
         # fixme is this really the right fallback? check it
         return '/'
 
-    def entry_is_not_ignored(self, path, entrytype):
+    def entry_is_not_ignored(self, path, entrytype, do_check):
+        '''
+        see if the given entry is in NOT in the ingored lists and return
+        True if so, False otherwise
+        we only do this check if the do_check argment is set to 'check';
+        otherwise we default to True
+        '''
+        if do_check != 'check':
+            return True
+
         basedir = self.get_basedir_from_path(path)
         if self.audit_type == 'logs' and entrytype == 'file':
             path = LocalLogsAuditor.normalize(path)
@@ -452,135 +451,144 @@
             clouseau.retention.ruleutils.do_add_rule(self.cdb, file_expr, 
filetype, status, self.cenv.host)
         return True
 
+    def do_add_rule(self):
+        # fixme need different completer here I think, that
+        # completes relative to self.cwdir
+        readline.set_completer(None)
+        path = raw_input("path or wildcard expr in rule (empty to quit): ")
+        path = path.strip()
+        if path == '':
+            return True
+        default = Status.text_to_status('good')
+        self.cmpl.set_choices_completion(Status.STATUSES + ['Q'], default)
+        while True:
+            statuses_text = Status.get_statuses_prompt(", ")
+            status = raw_input(statuses_text + " Q(quit)) [%s]: " %
+                               default)
+            status = status.strip()
+            if status == "":
+                status = default
+            if status[0].upper() in Status.STATUSES:
+                status = status[0].upper()
+                break
+            elif status == 'q' or status == 'Q':
+                return None
+            else:
+                print "Unknown status type"
+                continue
+
+        # fixme should check that any wildcard is only one and only
+        # in the last component... someday
+
+        if path[0] != os.path.sep:
+            path = os.path.join(self.cenv.cwdir, path)
+        if path[-1] == os.path.sep:
+            path = path[:-1]
+            filetype = clouseau.retention.ruleutils.text_to_entrytype('dir')
+        else:
+            filetype = clouseau.retention.ruleutils.text_to_entrytype('file')
+
+        clouseau.retention.ruleutils.do_add_rule(self.cdb, path, filetype, 
status, self.cenv.host)
+        # update the ignores list since we have a new rule
+        results = 
clouseau.retention.ignores.get_ignored_from_rulestore(self.cdb, 
[self.cenv.host])
+        if self.cenv.host in results:
+            self.ignored_from_rulestore[self.cenv.host] = 
results[self.cenv.host]
+        return True
+
+    def do_show_rules_with_status(self):
+        default = Status.text_to_status('problem')
+        self.cmpl.set_choices_completion(['A'] + Status.STATUSES + ['Q'], 
default)
+        while True:
+            statuses_text = Status.get_statuses_prompt(", ")
+            status = raw_input("status type A(all), " + statuses_text +
+                               ", Q(quit)) [%s]: " % default)
+            status = status.strip()
+            if status == "":
+                status = default
+
+            if status == 'q' or status == 'Q':
+                return None
+            elif status[0].upper() not in ['A'] + Status.STATUSES:
+                print "Unknown status type"
+                continue
+
+            readline.set_completer(None)
+            prefix = raw_input("starting with prefix? [/]: ")
+            prefix = prefix.strip()
+            if prefix == "":
+                prefix = "/"
+            if status == 'a' or status == 'A':
+                clouseau.retention.ruleutils.show_rules(self.cdb, 
self.cenv.host, prefix=prefix)
+                return True
+            elif status[0].upper() in Status.STATUSES:
+                clouseau.retention.ruleutils.show_rules(self.cdb, 
self.cenv.host, status[0].upper(),
+                                                        prefix=prefix)
+                return True
+
+    def do_remove_rule(self):
+        # fixme need different completer here I think, that
+        # completes relative to self.cwdir
+        readline.set_completer(None)
+        path = raw_input("path or wildcard expr in rule (empty to quit): ")
+        path = path.strip()
+        if path == '':
+            return True
+        elif path[0] != os.path.sep:
+            path = os.path.join(self.cenv.cwdir, path)
+        if path[-1] == os.path.sep:
+            path = path[:-1]
+        clouseau.retention.ruleutils.do_remove_rule(self.cdb, path, 
self.cenv.host)
+        # update the ignores list since we removed a rule
+        results = 
clouseau.retention.ignores.get_ignored_from_rulestore(self.cdb, 
[self.cenv.host])
+        if self.cenv.host in results:
+            self.ignored_from_rulestore[self.cenv.host] = 
results[self.cenv.host]
+        return True
+
+    def get_rules_path(self):
+        readline.set_completer(None)
+        rules_path = raw_input("full path to rules file (empty to quit): ")
+        rules_path = rules_path.strip()
+        if rules_path == '':
+            return rules_path
+        if not clouseau.retention.cliutils.check_rules_path(rules_path):
+            print "bad rules file path specified, aborting"
+            return ''
+        return rules_path
+
     def do_rule(self, command):
         if command == 'A' or command == 'a':
-            # fixme need different completer here I think, that
-            # completes relative to self.cwdir
-            readline.set_completer(None)
-            path = raw_input("path or wildcard expr in rule (empty to quit): ")
-            path = path.strip()
-            if path == '':
-                return True
-            default = Status.text_to_status('good')
-            self.cmpl.set_choices_completion(Status.STATUSES + ['Q'], default)
-            while True:
-                statuses_text = Status.get_statuses_prompt(", ")
-                status = raw_input(statuses_text + " Q(quit)) [%s]: " %
-                                   default)
-                status = status.strip()
-                if status == "":
-                    status = default
-                if status[0].upper() in Status.STATUSES:
-                    status = status[0].upper()
-                    break
-                elif status == 'q' or status == 'Q':
-                    return None
-                else:
-                    print "Unknown status type"
-                    continue
-
-            # fixme should check that any wildcard is only one and only
-            # in the last component... someday
-
-            if path[0] != os.path.sep:
-                path = os.path.join(self.cenv.cwdir, path)
-            if path[-1] == os.path.sep:
-                path = path[:-1]
-                filetype = 
clouseau.retention.ruleutils.text_to_entrytype('dir')
-            else:
-                filetype = 
clouseau.retention.ruleutils.text_to_entrytype('file')
-
-            clouseau.retention.ruleutils.do_add_rule(self.cdb, path, filetype, 
status, self.cenv.host)
-            # update the ignores list since we have a new rule
-            results = 
clouseau.retention.ignores.get_ignored_from_rulestore(self.cdb, 
[self.cenv.host])
-            if self.cenv.host in results:
-                self.ignored_from_rulestore[self.cenv.host] = 
results[self.cenv.host]
-            return True
+            result = self.do_add_rule()
         elif command == 'S' or command == 's':
-            default = Status.text_to_status('problem')
-            self.cmpl.set_choices_completion(['A'] + Status.STATUSES + ['Q'], 
default)
-            while True:
-                statuses_text = Status.get_statuses_prompt(", ")
-                status = raw_input("status type A(all), " + statuses_text +
-                                   ", Q(quit)) [%s]: " % default)
-                status = status.strip()
-                if status == "":
-                    status = default
-
-                if status == 'q' or status == 'Q':
-                    return None
-                elif status[0].upper() not in ['A'] + Status.STATUSES:
-                    print "Unknown status type"
-                    continue
-
-                readline.set_completer(None)
-                prefix = raw_input("starting with prefix? [/]: ")
-                prefix = prefix.strip()
-                if prefix == "":
-                    prefix = "/"
-                if status == 'a' or status == 'A':
-                    clouseau.retention.ruleutils.show_rules(self.cdb, 
self.cenv.host, prefix=prefix)
-                    return True
-                elif status[0].upper() in Status.STATUSES:
-                    clouseau.retention.ruleutils.show_rules(self.cdb, 
self.cenv.host, status[0].upper(),
-                                                            prefix=prefix)
-                    return True
+            result = self.do_show_rules_with_status()
         elif command == 'D' or command == 'd':
             self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno)
             clouseau.retention.ruleutils.get_rules_for_path(self.cdb, 
self.cenv.cwdir, self.cenv.host)
-            return True
+            result = True
         elif command == 'C' or command == 'c':
             self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno)
             clouseau.retention.ruleutils.get_rules_for_entries(self.cdb, 
self.cenv.cwdir,
                                                                
self.dircontents.entries_dict,
                                                                self.cenv.host)
-            return True
+            result = True
         elif command == 'R' or command == 'r':
-            # fixme need different completer here I think, that
-            # completes relative to self.cwdir
-            readline.set_completer(None)
-            path = raw_input("path or wildcard expr in rule (empty to quit): ")
-            path = path.strip()
-            if path == '':
-                return True
-            elif path[0] != os.path.sep:
-                path = os.path.join(self.cenv.cwdir, path)
-            if path[-1] == os.path.sep:
-                path = path[:-1]
-            clouseau.retention.ruleutils.do_remove_rule(self.cdb, path, 
self.cenv.host)
-            # update the ignores list since we removed a rule
-            results = 
clouseau.retention.ignores.get_ignored_from_rulestore(self.cdb, 
[self.cenv.host])
-            if self.cenv.host in results:
-                self.ignored_from_rulestore[self.cenv.host] = 
results[self.cenv.host]
-            return True
+            result = self.do_remove_rule()
         elif command == 'I' or command == 'i':
-            readline.set_completer(None)
-            rules_path = raw_input("full path to rules file (empty to quit): ")
-            rules_path = rules_path.strip()
-            if rules_path == '':
-                return True
-            if not clouseau.retention.cliutils.check_rules_path(rules_path):
-                print "bad rules file path specified, aborting"
-            else:
+            rules_path = self.get_rules_path()
+            if rules_path != '':
                 clouseau.retention.ruleutils.import_rules(self.cdb, 
rules_path, self.cenv.host)
-            return True
+            result = True
         elif command == 'E' or command == 'e':
-            readline.set_completer(None)
-            rules_path = raw_input("full path to rules file (empty to quit): ")
-            rules_path = rules_path.strip()
-            if rules_path == '':
-                return True
-            if not clouseau.retention.cliutils.check_rules_path(rules_path):
-                print "bad rules file path specified, aborting"
-            else:
+            rules_path = self.get_rules_path()
+            if rules_path != '':
                 clouseau.retention.ruleutils.export_rules(self.cdb, 
rules_path, self.cenv.host)
-            return True
+            result = True
         elif command == 'Q' or command == 'q':
             print "quitting this level"
-            return None
+            result = None
         else:
             clouseau.retention.cliutils.show_help('rule')
-            return True
+            result = True
+        return result
 
     def do_file_contents(self):
         # fixme need a different completer here... meh
@@ -626,30 +634,32 @@
                 print "Unknown filter type"
                 continue
 
+    def do_dir_descend(self, command):
+        while True:
+            # prompt user for dir to descend
+            readline.set_completer(self.cmpl.dir_completion)
+            self.cenv.set_prompt()
+            directory = raw_input(self.cenv.prompt + ' ' + "directory name 
(empty to quit): ")
+            directory = directory.strip()
+            if directory == '':
+                return command
+            if directory[-1] == os.path.sep:
+                directory = directory[:-1]
+            if (directory[0] == '/' and
+                    not directory.startswith(self.cenv.cwdir + os.path.sep)):
+                print 'New directory is not a subdirectory of',
+                print self.cenv.cwdir, "skipping"
+            else:
+                self.cenv.cwdir = os.path.join(self.cenv.cwdir,
+                                               directory)
+                self.dircontents.clear()
+                self.cenv.set_prompt()
+                print 'Now at', self.cenv.cwdir
+                return True
+
     def do_examine(self, command):
         if command == 'D' or command == 'd':
-            while True:
-                # prompt user for dir to descend
-                readline.set_completer(self.cmpl.dir_completion)
-                self.cenv.set_prompt()
-                directory = raw_input(self.cenv.prompt + ' ' + "directory name 
(empty to quit): ")
-                directory = directory.strip()
-                if directory == '':
-                    return command
-                if directory[-1] == os.path.sep:
-                    directory = directory[:-1]
-                if (directory[0] == '/' and
-                        not directory.startswith(self.cenv.cwdir +
-                                                 os.path.sep)):
-                    print 'New directory is not a subdirectory of',
-                    print self.cenv.cwdir, "skipping"
-                else:
-                    self.cenv.cwdir = os.path.join(self.cenv.cwdir,
-                                                   directory)
-                    self.dircontents.clear()
-                    self.cenv.set_prompt()
-                    print 'Now at', self.cenv.cwdir
-                    return True
+            return self.do_dir_descend(command)
         elif command == 'U' or command == 'u':
             if self.cenv.cwdir != self.basedir:
                 self.cenv.cwdir = os.path.dirname(self.cenv.cwdir)
@@ -658,38 +668,39 @@
                 print 'Now at', self.cenv.cwdir
             else:
                 print 'Already at top', self.cenv.cwdir
-            return True
+            result = True
         elif command == 'E' or command == 'e':
             self.dircontents.show(self.cenv.host, self.cenv.cwdir, 1, 
self.filtertype, self.entry_is_not_ignored)
-            return True
+            result = True
         elif command == 'C' or command == 'c':
             self.do_file_contents()
-            return True
+            result = True
         elif command == 'F' or command == 'f':
             self.do_filter()
-            return True
+            result = True
         elif command == 'R' or command == 'r':
             continuing = True
             while continuing:
                 command = self.show_menu('rule')
                 continuing = self.do_command(command, 'rule', self.cenv.cwdir)
-            return True
+            result = True
         elif command == 'M' or command == 'm':
-            return self.do_mark()
+            result = self.do_mark()
         elif command == 'Q' or command == 'q' or command == '':
             print "quitting this level"
-            return None
+            result = None
         else:
             clouseau.retention.cliutils.show_help('examine')
-            return True
+            result = True
+        return result
 
     def do_top(self, command, dir_path):
+        result = True
         if command == 'S' or command == 's':
             continuing = True
             while continuing:
                 command = self.show_menu('status')
                 continuing = self.do_command(command, 'status', dir_path)
-            return True
         elif command == 'E' or command == 'e':
             self.dircontents.show(self.cenv.host, self.cenv.cwdir, 1, 
self.filtertype, self.entry_is_not_ignored)
             continuing = True
@@ -699,36 +710,34 @@
                 command = self.show_menu('examine')
                 continuing = self.do_command(command, 'examine',
                                              self.cenv.cwdir)
-            return True
         elif command == 'F' or command == 'f':
             self.do_filter()
-            return True
         elif command == 'I' or command == 'i':
             # do nothing
-            return command
+            result = command
         elif command == 'R' or command == 'r':
             continuing = True
             while continuing:
                 command = self.show_menu('rule')
                 continuing = self.do_command(command, 'rule', self.cenv.cwdir)
-            return True
         elif command == 'Q' or command == 'q':
-            return None
+            result = None
         else:
             clouseau.retention.cliutils.show_help('top')
-            return True
+        return result
 
     def do_command(self, command, level, dir_path):
+        result = None
         if self.basedir is None:
             self.basedir = dir_path
         if self.cenv.cwdir is None:
             self.cenv.cwdir = dir_path
 
         if command is None:
-            return
+            return None
 
         if level == 'top':
-            return self.do_top(command, dir_path)
+            result = self.do_top(command, dir_path)
         elif level == 'status':
             if command in Status.STATUSES:
                 # this option is invoked on a directory so
@@ -741,10 +750,9 @@
                 return None
             else:
                 clouseau.retention.cliutils.show_help(level)
-                return True
+                result = True
         elif level == 'examine':
-            return self.do_examine(command)
+            result = self.do_examine(command)
         elif level == 'rule':
-            return self.do_rule(command)
-        else:
-            return None
+            result = self.do_rule(command)
+        return result
diff --git a/dataretention/retention/cliutils.py 
b/dataretention/retention/cliutils.py
index 3d4aaea..b1133eb 100644
--- a/dataretention/retention/cliutils.py
+++ b/dataretention/retention/cliutils.py
@@ -102,6 +102,9 @@
 
 def show_pager(current_page, num_items, num_per_page):
     readline.set_completer(None)
+    num_pages = num_items / num_per_page
+    if num_items % num_per_page:
+        num_pages += 1
     while True:
         to_show = raw_input("P(prev)/N(next)/F(first)/"
                             "L(last)/<num>(go to page num)/Q(quit) [N]: ")
@@ -111,49 +114,33 @@
 
         if to_show == 'P' or to_show == 'p':
             # prev page
-            if current_page > 1:
-                return current_page - 1
-            else:
-                return current_page
-
+            result = current_page - 1
+            break
         elif to_show == 'N' or to_show == 'n':
             # next page
-            num_pages = num_items / num_per_page
-            if num_items % num_per_page:
-                num_pages += 1
-            if current_page < num_pages:
-                return current_page + 1
-            else:
-                return current_page
-
+            result = current_page + 1
+            break
         elif to_show == 'F' or to_show == 'f':
             # first page
-            return 1
-
-        elif to_show == 'L' or 'to_show' == 'l':
+            result = 1
+            break
+        elif to_show == 'L' or to_show == 'l':
             # last page
-            num_pages = num_items / num_per_page
-            if num_items % num_per_page:
-                num_pages += 1
-            return num_pages
-
+            result = num_pages
+            break
         elif to_show.isdigit():
-            desired_page = int(to_show)
-            num_pages = num_items / num_per_page
-            if num_items % num_per_page:
-                num_pages += 1
-
-            if desired_page < 1:
-                return 1
-            elif desired_page > num_pages:
-                return num_pages
-            else:
-                return desired_page
-
+            result = int(to_show)
+            break
         elif to_show == 'Q' or to_show == 'q':
             return None
         else:
             print "unknown option"
+
+    if result < 1:
+        result = 1
+    elif result > num_pages:
+        result = num_pages
+    return result
 
 def check_rules_path(rules_path):
     # sanity check on the path, let's not read/write
@@ -161,4 +148,3 @@
 
     # fixme write this
     return True
-
diff --git a/dataretention/retention/fileinfo.py 
b/dataretention/retention/fileinfo.py
index 2b85588..122c18c 100644
--- a/dataretention/retention/fileinfo.py
+++ b/dataretention/retention/fileinfo.py
@@ -7,10 +7,174 @@
 import datetime
 import stat
 
+def get_time_formatted(time_raw):
+    '''
+    format unix time to a printable string
+    and return it, or the empty string on error
+    '''
+    if time_raw is None:
+        return ""
+    else:
+        return time.ctime(time_raw)
+
+def check_text_binary(content):
+    textchars = (''.join(map(chr, [7, 8, 9, 10, 12, 13, 27] +
+                             range(0x20, 0x100))))
+    try:
+        is_binary = bool(content.translate(None, textchars))
+    except:
+        return None
+    return is_binary
+
+def get_date_fromtext(text):
+    '''
+    given a text string look for the first date string in there
+    of arbitrary format
+    this is very sketchy, not at all guaranteed to work, and
+    especially not in fancy locales
+    '''
+
+    current_year = str(datetime.datetime.now().year)
+    # formats actually seen in log files:
+
+    datecheck = {
+        # May  5 03:31:02
+        '%b %d %H:%M:%S ?%Y':
+        
[r'[A-Z][a-z]{2}\s+[0-9][0-9]?\s[0-9]{2}:[0-9]{2}:[0-9]{2}(?!\s+[0-9]{4})',
+         ' ?' + current_year],
+        # Jan 15 12:53:26 2014
+        '%b %d %H:%M:%S %Y':
+        [r'[A-Z][a-z]{2}\s+[0-9][0-9]?\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[0-9]{4}', 
''],
+        # 2013-03-08 04:10:33
+        '%Y-%m-%d %H:%M:%S':
+        [r'[0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2}', ''],
+        # 130312 09:30:58
+        '%y%m%d %H:%M:%S':
+        [r'[0-9]{6}\s[0-9]{2}:[0-9]{2}:[0-9]{2}', ''],
+        # 10/Feb/2014:03:52:38 +0200
+        '%d/%b/%Y:%H:%M:%S':
+        [r'[0-9]{2}/[A-Z][a-z]{2}/[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2}', '']
+    }
+    if (text is None or text == 'BINARY' or text == 'EMPTY'
+            or text == 'UNAVAILABLE' or text == ''):
+        return None
+
+    for date_format in datecheck:
+        result = re.search(datecheck[date_format][0], text)
+        if result:
+            try:
+                seconds = calendar.timegm(time.strptime(
+                    result.group(0) + datecheck[date_format][1], date_format))
+            except:
+                continue
+            return seconds
+
+    # Mar  8 03:51:00.928699
+    # [A-Z][a-z]{2}\s+[0-9][0-9]?\s[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+
+    # 2013-03-11 09:40:24.358+0000
+    # [0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+\+[0-9]{4}
+    # 2013-07-23 14:18:15,555
+    # [0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]+
+    # covered by the above
+
+    # failed to find date
+    return None
+
+def bool_to_string(value):
+    '''
+    turn bools into the equivalent string,
+    also treating 1 as True, -1 and None as unknown ('--')
+    and all other numeric etc values as False
+    '''
+
+    if value is None or value == -1:
+        return '-'
+    elif value is True:
+        return 'T'
+    else:
+        return 'F'
+
+def string_to_bool(value):
+    '''
+    turn strings 'T', 'F', '--' into the bool
+    values True, False, None; this is almost the
+    inverse of bool_to_string
+    '''
+    if value == 'T':
+        return True
+    elif value == 'F':
+        return False
+    else:
+        return None
+
+def stat_to_dict(fstat):
+    stat_dict = {
+        'dev': fstat.st_dev,
+        'inode': fstat.st_ino,
+        'mode': fstat.st_mode,
+        'nlink': fstat.st_nlink,
+        'uid': fstat.st_uid,
+        'gid': fstat.st_gid,
+        'dev_spec': fstat.st_rdev,
+        'size': fstat.st_size,
+        'blksize': fstat.st_blksize,
+        'blkcnt': fstat.st_blocks,
+        'atime': fstat.st_atime,
+        'mtime': fstat.st_mtime,
+        'ctime': fstat.st_ctime
+    }
+    return stat_dict
+
+def format_pretty_output_from_dict(item, show_content=False, 
path_justify=None):
+    output = ("file: %s" % item['path'].ljust(path_justify) +
+              ("  owner:%s" % str(item['owner']).ljust(5) if 'owner' in item 
else "") +
+              ("  (creat:%s" % item['ctime'] if 'ctime' in item else "") +
+              ("  mod:%s" % item['mtime'] if 'mtime' in item else "") +
+              ("  open:%s" % item['open'] if 'open' in item else "") +
+              ("  empty:%s" % item['empty'] if 'empty' in item else "") +
+              ("  old:%s" % item['old'] if 'old' in item else "") +
+              ("  type:%s" % item['type'] if 'type' in item else ""))
+    if show_content and 'content' in item:
+        output = output + "\n    content:%s" % item['content']
+    return output
+
+def fileinfo_from_dict(item, fromtime=None):
+    '''
+    this is the inverse of produce_dict, returning a new
+    FileInfo object
+    '''
+    if fromtime is None:
+        fromtime = time.time()
+    # fixme - eh? what's broken?
+    finfo = FileInfo(item['path'], magic=None, statinfo=None)
+    finfo.stat = item['stat']
+    finfo.filetype = item['filetype']
+    if 'content' in item:
+        finfo.start_content = item['content']
+    finfo.is_empty = string_to_bool(item['empty'])
+    finfo.is_binary = string_to_bool(item['binary'])
+    finfo.is_open = string_to_bool(item['open'])
+    finfo.entrydate = item['entrydate']
+
+
 class EntryInfo(object):
     '''
     minimum info about a directory entry
     '''
+    @staticmethod
+    def display_from_dict(item, show_content=False, path_justify=None):
+        if path_justify is None:
+            path_justify = 50  # very arbitrary, whatever
+        print ("file:%s owner:%s size:%s mod:%s type:%s"
+               % (item['path'].ljust(path_justify),
+                  str(item['owner']).ljust(5),
+                  str(item['size']).ljust(10), item['mtime'],
+                  item['type']))
+        if show_content and 'content' in item:
+            print "    content:%s" % item['content'],
+            if item['content'][-1] != '\n':
+                print
+
     def __init__(self, path):
         self.path = path
         self.stat = None
@@ -19,6 +183,7 @@
         self.entry_dict = None
         self.json = None
         self.is_empty = None
+        self.entrydate = None
 
     def get_stats(self):
         if self.stat is None:
@@ -67,7 +232,7 @@
                 filep = open(self.path, "r")
                 lines = ""
                 firstline = None
-                for count in range(0, num_lines):
+                for _ in range(0, num_lines):
                     line = filep.readline(1000)
                     if line == "":
                         break
@@ -86,23 +251,13 @@
             self.is_empty = False
 
         # and for binary then...?
-        if EntryInfo.check_text_binary(lines):
+        if check_text_binary(lines):
             firstline = "BINARY"
 
-        self.entrydate = EntryInfo.get_date_fromtext(firstline)
+        self.entrydate = get_date_fromtext(firstline)
 
         self.start_content = lines
         return self.start_content
-
-    @staticmethod
-    def check_text_binary(content):
-        textchars = (''.join(map(chr, [7, 8, 9, 10, 12, 13, 27] +
-                                 range(0x20, 0x100))))
-        try:
-            is_binary = bool(content.translate(None, textchars))
-        except:
-            return None
-        return is_binary
 
     def get_is_binary(self, num_lines=1):
         '''
@@ -115,7 +270,7 @@
         '''
         if self.is_binary is None:
             self.get_start_content(num_lines)
-        self.is_binary = EntryInfo.check_text_binary(self.start_content)
+        self.is_binary = check_text_binary(self.start_content)
         return self.is_binary
 
     def get_owner(self):
@@ -175,81 +330,21 @@
                 return None
         return self.json
 
-    @staticmethod
-    def get_date_fromtext(text):
-        '''
-        given a text string look for the first date string in there
-        of arbitrary format
-        this is very sketchy, not at all guaranteed to work, and
-        especially not in fancy locales
-        '''
-
-        current_year = str(datetime.datetime.now().year)
-        # formats actually seen in log files:
-
-        datecheck = {
-            # May  5 03:31:02
-            '%b %d %H:%M:%S ?%Y':
-            
[r'[A-Z][a-z]{2}\s+[0-9][0-9]?\s[0-9]{2}:[0-9]{2}:[0-9]{2}(?!\s+[0-9]{4})',
-             ' ?' + current_year],
-            # Jan 15 12:53:26 2014
-            '%b %d %H:%M:%S %Y':
-            
[r'[A-Z][a-z]{2}\s+[0-9][0-9]?\s[0-9]{2}:[0-9]{2}:[0-9]{2}\s[0-9]{4}', ''],
-            # 2013-03-08 04:10:33
-            '%Y-%m-%d %H:%M:%S':
-            [r'[0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2}', ''],
-            # 130312 09:30:58
-            '%y%m%d %H:%M:%S':
-            [r'[0-9]{6}\s[0-9]{2}:[0-9]{2}:[0-9]{2}', ''],
-            # 10/Feb/2014:03:52:38 +0200
-            '%d/%b/%Y:%H:%M:%S':
-            [r'[0-9]{2}/[A-Z][a-z]{2}/[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2}', '']
-        }
-        if (text is None or text == 'BINARY' or text == 'EMPTY'
-                or text == 'UNAVAILABLE' or text == ''):
-            return None
-
-        for date_format in datecheck:
-            result = re.search(datecheck[date_format][0], text)
-            if result:
-                try:
-                    seconds = calendar.timegm(time.strptime(
-                        result.group(0) + datecheck[date_format][1], 
date_format))
-                except:
-                    continue
-                return seconds
-
-        # Mar  8 03:51:00.928699
-        # [A-Z][a-z]{2}\s+[0-9][0-9]?\s[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+
-        # 2013-03-11 09:40:24.358+0000
-        # 
[0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+\+[0-9]{4}
-        # 2013-07-23 14:18:15,555
-        # [0-9]{4}-[0-9]{2}-[0-9]{2}\s[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]+
-        # covered by the above
-
-        # failed to find date
-        return None
-
-    @staticmethod
-    def display_from_dict(item, show_content=False, path_justify=None):
-        if path_justify is None:
-            path_justify = 50  # very arbitrary, whatever
-        print ("file:%s owner:%s size:%s mod:%s type:%s"
-               % (item['path'].ljust(path_justify),
-                  str(item['owner']).ljust(5),
-                  str(item['size']).ljust(10), item['mtime'],
-                  item['type']))
-        if show_content and 'content' in item:
-            print "    content:%s" % item['content'],
-            if item['content'][-1] != '\n':
-                print
-
 
 class FileInfo(EntryInfo):
     '''
     maintain and provide information (stat, filetype, other)
     about a file
     '''
+    @staticmethod
+    def display_from_dict(item, show_content=False, path_justify=None):
+        print ("file:%s owner:%s creat:%s mod:%s open:%s empty:%s old:%s 
type:%s"
+               % (item['path'].ljust(path_justify), item['owner'].ljust(5),
+                  item['ctime'], item['mtime'], item['open'],
+                  item['empty'], item['old'], item['type']))
+        if show_content and 'content' in item:
+            print "    content:%s" % item['content']
+
     def __init__(self, path, magic, statinfo=None):
         '''
         constructor
@@ -257,14 +352,11 @@
         for determining file type
         '''
         super(FileInfo, self).__init__(path)
-        self.name = os.path.basename(self.path)
+#        self.name = os.path.basename(self.path)
         self.stat = statinfo
         self.filetype = None
         self.is_empty = None
         self.is_open = None
-        self.age = None
-        self.is_old = None
-        self.entrydate = None
         self.magic = magic
 
     def get_ctime(self):
@@ -289,30 +381,19 @@
         else:
             return None
 
-    @staticmethod
-    def get_time_formatted(time_raw):
-        '''
-        format unix time to a printable string
-        and return it, or the empty string on error
-        '''
-        if time_raw is None:
-            return ""
-        else:
-            return time.ctime(time_raw)
-
     def get_ctime_formatted(self):
         '''
         get the ctime of the file in a printable string
         and return it, or the empty string on error
         '''
-        return FileInfo.get_time_formatted(self.get_ctime)
+        return get_time_formatted(self.get_ctime)
 
     def get_mtime_formatted(self):
         '''
         get the mtime of the file in a printable string
         and return it, or the empty string on error
         '''
-        return FileInfo.get_time_formatted(self.get_mtime)
+        return get_time_formatted(self.get_mtime)
 
     def get_filetype(self):
         '''
@@ -349,47 +430,41 @@
         return self.is_open
 
     def get_age(self, from_time=None):
-        if self.age is None:
-            if from_time is None:
-                return self.age
+        if from_time is None:
+            return None
 
-            self.get_ctime()
-            self.get_mtime()
-            if self.stat is not None:
-                age = max(from_time - self.stat.st_ctime,
-                          from_time - self.stat.st_mtime)
-            else:
-                age = None
+        self.get_ctime()
+        self.get_mtime()
+        if self.stat is not None:
+            age = max(from_time - self.stat.st_ctime,
+                      from_time - self.stat.st_mtime)
+        else:
+            age = None
 
-            self.get_start_content()
-            if self.entrydate is not None:
-                if age is None:
-                    age = 0
-                age = max(age, from_time - self.entrydate)
-
-            self.age = age
-
-        return self.age
+        self.get_start_content()
+        if self.entrydate is not None:
+            if age is None:
+                age = 0
+            age = max(age, from_time - self.entrydate)
+        return age
 
     def get_is_old(self, from_time=None, cutoff=None):
         '''
         determine as best as possible if the file is older than
         a certain number of seconds from the specified time
         '''
-        if self.is_old is not None:
-            return self.is_old
-
+        is_old = None
         age = self.get_age(from_time)
 
         if age is not None and cutoff is not None:
             if age > cutoff:
-                self.is_old = True
+                is_old = True
             elif (self.entrydate is not None and
                   0 < from_time - self.entrydate < cutoff):
-                self.is_old = False
+                is_old = False
             else:
-                self.is_old = -1
-        return self.is_old
+                is_old = -1
+        return is_old
 
     def load_file_info(self, from_time, cutoff, open_files=None):
         if open_files is None:
@@ -400,69 +475,20 @@
         self.get_filetype()
         self.get_is_open(open_files)
 
-    @staticmethod
-    def bool_to_string(value):
-        '''
-        turn bools into the equivalent string,
-        also treating 1 as True, -1 and None as unknown ('--')
-        and all other numeric etc values as False
-        '''
-
-        if value is None or value == -1:
-            return '-'
-        elif value is True:
-            return 'T'
-        else:
-            return 'F'
-
-    @staticmethod
-    def string_to_bool(value):
-        '''
-        turn strings 'T', 'F', '--' into the bool
-        values True, False, None; this is almost the
-        inverse of bool_to_string
-        '''
-        if value == 'T':
-            return True
-        elif value == 'F':
-            return False
-        else:
-            return None
-
-    @staticmethod
-    def stat_to_dict(fstat):
-        stat_dict = {
-            'dev': fstat.st_dev,
-            'inode': fstat.st_ino,
-            'mode': fstat.st_mode,
-            'nlink': fstat.st_nlink,
-            'uid': fstat.st_uid,
-            'gid': fstat.st_gid,
-            'dev_spec': fstat.st_rdev,
-            'size': fstat.st_size,
-            'blksize': fstat.st_blksize,
-            'blkcnt': fstat.st_blocks,
-            'atime': fstat.st_atime,
-            'mtime': fstat.st_mtime,
-            'ctime': fstat.st_ctime
-        }
-
-        return stat_dict
-
     def produce_dict(self):
         self.entry_dict = {'path': self.path,
                            'owner': str(self.get_owner()),
-                           'ctime': FileInfo.get_time_formatted(
+                           'ctime': get_time_formatted(
                                self.get_ctime()),
-                           'mtime': FileInfo.get_time_formatted(
+                           'mtime': get_time_formatted(
                                self.get_mtime()),
-                           'open': FileInfo.bool_to_string(self.is_open),
-                           'empty': FileInfo.bool_to_string(
+                           'open': bool_to_string(self.is_open),
+                           'empty': bool_to_string(
                                self.get_is_empty()),
-                           'old': FileInfo.bool_to_string(self.get_is_old()),
+                           'old': bool_to_string(self.get_is_old()),
                            'type': self.get_filetype(),
                            'binary': self.get_is_binary(),
-                           'stat': FileInfo.stat_to_dict(self.stat),
+                           'stat': stat_to_dict(self.stat),
                            'entrydate': self.entrydate}
         if ((not self.is_binary and 'data' not in self.filetype and
              'binary' not in self.filetype) and
@@ -470,24 +496,10 @@
             self.entry_dict['content'] = self.start_content
         return self.entry_dict
 
-    @staticmethod
-    def format_pretty_output_from_dict(item, show_content=False, 
path_justify=None):
-        output = ("file: %s" % item['path'].ljust(path_justify) +
-                  ("  owner:%s" % str(item['owner']).ljust(5) if 'owner' in 
item else "") +
-                  ("  (creat:%s" % item['ctime'] if 'ctime' in item else "") +
-                  ("  mod:%s" % item['mtime'] if 'mtime' in item else "") +
-                  ("  open:%s" % item['open'] if 'open' in item else "") +
-                  ("  empty:%s" % item['empty'] if 'empty' in item else "") +
-                  ("  old:%s" % item['old'] if 'old' in item else "") +
-                  ("  type:%s" % item['type'] if 'type' in item else ""))
-        if show_content and 'content' in item:
-            output = output + "\n    content:%s" % item['content']
-        return output
-
     def format_output_from_dict(self, item, show_content=False,
                                 prettyprint=False, path_justify=None):
         if prettyprint:
-            output = FileInfo.format_pretty_output_from_dict(
+            output = format_pretty_output_from_dict(
                 item, show_content, path_justify)
         else:
             output = self.produce_json(item)
@@ -503,36 +515,6 @@
         item = self.produce_dict()
         return self.format_output_from_dict(item, show_content,
                                             prettyprint, path_justify)
-
-    @staticmethod
-    def fileinfo_from_dict(item, fromtime=None):
-        '''
-        this is the inverse of produce_dict, returning a new
-        FileInfo object
-        '''
-        if fromtime is None:
-            fromtime = time.time()
-        # fixme - eh? what's broken?
-        finfo = FileInfo(item['path'], magic=None, statinfo=None)
-        finfo.stat = item['stat']
-        finfo.filetype = item['filetype']
-        if 'content' in item:
-            finfo.start_content = item['content']
-        finfo.is_empty = FileInfo.string_to_bool(item['empty'])
-        finfo.is_binary = FileInfo.string_to_bool(item['binary'])
-        finfo.is_open = FileInfo.string_to_bool(item['open'])
-        finfo.age = None  # not perfect but what can we do
-        finfo.is_old = FileInfo.string_to_bool(item['old'])
-        finfo.entrydate = item['entrydate']
-
-    @staticmethod
-    def display_from_dict(item, show_content=False, path_justify=None):
-        print ("file:%s owner:%s creat:%s mod:%s open:%s empty:%s old:%s 
type:%s"
-               % (item['path'].ljust(path_justify), item['owner'].ljust(5),
-                  item['ctime'], item['mtime'], item['open'],
-                  item['empty'], item['old'], item['type']))
-        if show_content and 'content' in item:
-            print "    content:%s" % item['content']
 
 
 class LogUtils(object):
@@ -568,6 +550,7 @@
         self.normalized_path = os.path.join(os.path.dirname(self.path),
                                             self.normalized)
         self.rotated = None
+        self.notifempty = None
 
     def load_file_info(self, from_time, cutoff, open_files=None, rotated=None):
         if rotated is None:
diff --git a/dataretention/retention/ignores.py 
b/dataretention/retention/ignores.py
index c2dedcc..abf1028 100644
--- a/dataretention/retention/ignores.py
+++ b/dataretention/retention/ignores.py
@@ -370,37 +370,20 @@
                         result[igntype][item] = list(set(result[igntype][item] 
+ ign[igntype][item]))
         return result
 
-    def show_ignored(self, basedirs, ignored, headertext=None):
+    def show_ignored(self, basedirs, ignored, igntype, text):
+        sys.stderr.write("INFO: Ignoring " + text + " :\n")
+        for basedir in ignored[igntype]:
+            if basedir in basedirs or basedir == '*' or basedir == '/':
+                sys.stderr.write(
+                    "INFO: " + ','.join(ignored[igntype][basedir])
+                    + " in " + basedir + '\n')
+
+    def show_ignoreds(self, basedirs, ignoreds, headertext=None):
         if headertext:
             sys.stderr.write("INFO: " + headertext + '\n')
 
-        if 'dirs' in ignored:
-            sys.stderr.write("INFO: Ignoring the following directories:\n")
-            for basedir in ignored['dirs']:
-                if basedir in basedirs or basedir == '*' or basedir == '/':
-                    sys.stderr.write(
-                        "INFO: " + ','.join(ignored['dirs'][basedir])
-                        + " in " + basedir + '\n')
-
-        if 'files' in ignored:
-            sys.stderr.write("INFO: Ignoring the following files:\n")
-            for basedir in ignored['files']:
-                if basedir in basedirs or basedir == '*' or basedir == '/':
-                    sys.stderr.write(
-                        "INFO: " + ','.join(ignored['files'][basedir])
-                        + " in " + basedir + '\n')
-
-        if 'prefixes' in ignored:
-            sys.stderr.write(
-                "INFO: Ignoring files starting with the following:\n")
-            sys.stderr.write(
-                "INFO: " + ','.join(ignored['prefixes']) + '\n')
-
-        if 'extensions' in ignored:
-            sys.stderr.write(
-                "INFO: Ignoring files ending with the following:\n")
-            for basedir in ignored['extensions']:
-                if basedir in basedirs or basedir == '*':
-                    sys.stderr.write("INFO: " + ','.join(
-                        ignored['extensions'][basedir])
-                                     + " in " + basedir + '\n')
+        for ignored in ignoreds:
+            self.show_ignored(basedirs, ignored, 'dirs', 'the following 
directories')
+            self.show_ignored(basedirs, ignored, 'files', 'the following 
files')
+            self.show_ignored(basedirs, ignored, 'prefixes', 'files starting 
with the following')
+            self.show_ignored(basedirs, ignored, 'extensions', 'files ending 
with the following')
diff --git a/dataretention/retention/localexaminer.py 
b/dataretention/retention/localexaminer.py
index 00daf2a..34efce8 100644
--- a/dataretention/retention/localexaminer.py
+++ b/dataretention/retention/localexaminer.py
@@ -4,6 +4,7 @@
 
 from clouseau.retention.utils import JsonHelper
 from clouseau.retention.fileinfo import FileInfo, EntryInfo
+import clouseau.retention.fileinfo
 
 
 class LocalFileExaminer(object):
@@ -134,7 +135,7 @@
         except:
             print json_text
             return json_text
-        output = FileInfo.format_pretty_output_from_dict(item, path_justify=50)
+        output = 
clouseau.retention.fileinfo.format_pretty_output_from_dict(item, 
path_justify=50)
         print output
         return output
 
diff --git a/dataretention/retention/localfileaudit.py 
b/dataretention/retention/localfileaudit.py
index 48359ef..caed1d5 100644
--- a/dataretention/retention/localfileaudit.py
+++ b/dataretention/retention/localfileaudit.py
@@ -97,7 +97,6 @@
             self.dirs_to_check = [d.rstrip(os.path.sep) for d in check_list
                                   if d.startswith(os.sep)]
 
-
     def normalize(self, fname):
         '''
         subclasses may want to do something different, see
diff --git a/dataretention/retention/locallogaudit.py 
b/dataretention/retention/locallogaudit.py
index 1383e9c..7e64a96 100644
--- a/dataretention/retention/locallogaudit.py
+++ b/dataretention/retention/locallogaudit.py
@@ -216,7 +216,7 @@
                     if not fields[1].isdigit():
                         continue
                     found = True
-                    if int(fields[1]) > 
clouseau.retention.config.conf['cutoff']/86400:
+                    if int(fields[1]) > 
clouseau.retention.config.conf['cutoff'] / 86400:
                         if output:
                             output = output + '\n'
                         output = output + ('WARNING: some mysql logs expired 
after %s days in %s'
@@ -267,6 +267,7 @@
                                    for fname in all_files]) + 2
 
         for fname in all_files_sorted:
+            fage = all_files[fname].get_age()
             if clouseau.retention.fileutils.contains(all_files[fname].filetype,
                                                      
clouseau.retention.config.conf['ignored_types']):
                 continue
@@ -274,10 +275,10 @@
             if (self.oldest_only and
                     all_files[fname].normalized == last_log_normalized):
                 # still doing the same group of logs
-                if all_files[fname].age <= age:
+                if fage <= age:
                     continue
                 else:
-                    age = all_files[fname].age
+                    age = fage
                     last_log = fname
             else:
                 if last_log:
@@ -288,7 +289,7 @@
                 # starting new set of logs (maybe first set)
                 last_log_normalized = all_files[fname].normalized
                 last_log = fname
-                age = all_files[fname].age
+                age = fage
 
         if last_log:
             result.append(all_files[last_log].format_output(
diff --git a/dataretention/retention/remotefileauditor.py 
b/dataretention/retention/remotefileauditor.py
index 203a8fa..9012af0 100644
--- a/dataretention/retention/remotefileauditor.py
+++ b/dataretention/retention/remotefileauditor.py
@@ -1,5 +1,4 @@
 import os
-import time
 import json
 
 import clouseau.retention.magic
@@ -8,10 +7,23 @@
 from clouseau.retention.rule import RuleStore
 import clouseau.retention.config
 from clouseau.retention.fileinfo import FileInfo
+import clouseau.retention.fileinfo
 from clouseau.retention.utils import JsonHelper
 from retention.runner import Runner
 import clouseau.retention.ruleutils
 
+def display_summary_line(line, prompt=None, message=None):
+    if line == "":
+        return True
+    elif (line.startswith("WARNING:") or
+          line.startswith("INFO:")):
+        if prompt is not None:
+            print prompt + ": ",
+        if message is not None:
+            print message
+        print line
+        return True
+    return False
 
 def get_dirs_toexamine(host_report):
     '''
@@ -46,11 +58,11 @@
             print "WARNING: failed to load json for", json_entry
             continue
         if 'empty' in entry:
-            empty = FileInfo.string_to_bool(entry['empty'])
+            empty = clouseau.retention.fileinfo.string_to_bool(entry['empty'])
             if empty:
                 continue
         if 'old' in entry:
-            old = FileInfo.string_to_bool(entry['old'])
+            old = clouseau.retention.fileinfo.string_to_bool(entry['old'])
             if old is None or old:
                 if os.path.dirname(entry['path']) not in dirs_problem:
                     dirs_problem.add(os.path.dirname(entry['path']))
@@ -118,7 +130,6 @@
         self.verbose = verbose
 
         clouseau.retention.config.set_up_conf(confdir)
-        self.cutoff = clouseau.retention.config.conf['cutoff']
 
         client = LocalClientPlus()
         hosts, expr_type = Runner.get_hosts_expr_type(self.hosts_expr)
@@ -132,7 +143,6 @@
         self.cdb.store_db_init(self.expanded_hosts)
         self.set_up_and_export_rule_store()
 
-        self.today = time.time()
         self.magic = 
clouseau.retention.magic.magic_open(clouseau.retention.magic.MAGIC_NONE)
         self.magic.load()
         self.summary = None
@@ -270,15 +280,10 @@
             print "host:", host
 
             if result[host]:
-                self.summary = {}
                 try:
                     lines = result[host].split('\n')
                     for line in lines:
-                        if line == '':
-                            continue
-                        if (line.startswith("WARNING:") or
-                                line.startswith("INFO:")):
-                            print line
+                        if display_summary_line(line):
                             continue
                         else:
                             try:
diff --git a/dataretention/retention/remotelogauditor.py 
b/dataretention/retention/remotelogauditor.py
index de5f833..2c45553 100644
--- a/dataretention/retention/remotelogauditor.py
+++ b/dataretention/retention/remotelogauditor.py
@@ -3,6 +3,26 @@
 from clouseau.retention.fileinfo import LogInfo
 from clouseau.retention.utils import JsonHelper
 from retention.remotefileauditor import RemoteFilesAuditor
+import retention.remotefileauditor
+
+def summarize_log_issues(log_items, host, logs):
+    for item in log_items:
+        log_name = item['normalized']
+        if not item['normalized'] in logs:
+            logs[log_name] = {}
+            logs[log_name]['old'] = set()
+            logs[log_name]['maybe_old'] = set()
+            logs[log_name]['unrot'] = set()
+            logs[log_name]['notifempty'] = set()
+        if item['old'] == 'T':
+            logs[log_name]['old'].add(host)
+        elif item['old'] == '-':
+            logs[log_name]['maybe_old'].add(host)
+        if item['rotated'].startswith('F'):
+            logs[log_name]['unrot'].add(host)
+        if item['notifempty'] == 'T':
+            logs[log_name]['notifempty'].add(host)
+    return logs
 
 
 class RemoteLogsAuditor(RemoteFilesAuditor):
@@ -45,16 +65,11 @@
         for host in all_hosts:
             output = None
             if audit_results[host]:
+                output = []
                 try:
                     lines = audit_results[host].split('\n')
-                    output = []
                     for line in lines:
-                        if line == "":
-                            continue
-                        elif (line.startswith("WARNING:") or
-                              line.startswith("INFO:")):
-                            print 'host:', host
-                            print line
+                        if 
retention.remotefileauditor.display_summary_line(line, 'host', host):
                             continue
                         output.append(json.loads(
                             line, object_hook=JsonHelper.decode_dict))
@@ -67,22 +82,8 @@
                     continue
             if output is None:
                 continue
-            for item in output:
-                log_name = item['normalized']
-                if not item['normalized'] in logs:
-                    logs[log_name] = {}
-                    logs[log_name]['old'] = set()
-                    logs[log_name]['maybe_old'] = set()
-                    logs[log_name]['unrot'] = set()
-                    logs[log_name]['notifempty'] = set()
-                if item['old'] == 'T':
-                    logs[log_name]['old'].add(host)
-                elif item['old'] == '-':
-                    logs[log_name]['maybe_old'].add(host)
-                if item['rotated'].startswith('F'):
-                    logs[log_name]['unrot'].add(host)
-                if item['notifempty'] == 'T':
-                    logs[log_name]['notifempty'].add(host)
+            logs = summarize_log_issues(output, host, logs)
+
         sorted_lognames = sorted(logs.keys())
         for logname in sorted_lognames:
             old_count = len(logs[logname]['old'])
@@ -157,5 +158,3 @@
                                        path_justify, norm_justify)
         except:
             print "WARNING: failed to load json from host:", result
-
-
diff --git a/dataretention/retention/remoteusercfgrabber.py 
b/dataretention/retention/remoteusercfgrabber.py
index 0785207..5ca24a8 100644
--- a/dataretention/retention/remoteusercfgrabber.py
+++ b/dataretention/retention/remoteusercfgrabber.py
@@ -50,4 +50,3 @@
             print local_ignores
 
         return local_ignores
-
diff --git a/dataretention/retention/rule.py b/dataretention/retention/rule.py
index d5d813e..51ce11c 100644
--- a/dataretention/retention/rule.py
+++ b/dataretention/retention/rule.py
@@ -1,5 +1,4 @@
 import os
-import re
 import sqlite3
 from clouseau.retention.saltclientplus import LocalClientPlus
 from clouseau.retention.status import Status
diff --git a/dataretention/retention/saltclientplus.py 
b/dataretention/retention/saltclientplus.py
index 98b6a75..0199c7d 100644
--- a/dataretention/retention/saltclientplus.py
+++ b/dataretention/retention/saltclientplus.py
@@ -49,4 +49,3 @@
                 for host in resp:
                     hosts.append(host)
             return list(set(hosts))
-

-- 
To view, visit https://gerrit.wikimedia.org/r/233472
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I3a359e03925d0f669eff0b216db859efe55d20c2
Gerrit-PatchSet: 2
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to