ArielGlenn has submitted this change and it was merged.

Change subject: move rule static methods out to a seperate utils file
......................................................................


move rule static methods out to a seperate utils file

Change-Id: Ie73f7215a8ff4be9c9a079e03f465b393b3de329
---
M dataretention/retention/cli.py
M dataretention/retention/localfileaudit.py
M dataretention/retention/remotefileauditor.py
M dataretention/retention/rule.py
A dataretention/retention/ruleutils.py
M dataretention/rulestore.py
6 files changed, 464 insertions(+), 481 deletions(-)

Approvals:
  ArielGlenn: Verified; Looks good to me, approved



diff --git a/dataretention/retention/cli.py b/dataretention/retention/cli.py
index df474b7..3b09b1f 100644
--- a/dataretention/retention/cli.py
+++ b/dataretention/retention/cli.py
@@ -21,6 +21,7 @@
 from retention.config import Config
 from retention.examiner import DirExaminer, FileExaminer
 import retention.fileutils
+import retention.ruleutils
 
 class LocalIgnores(object):
     '''
@@ -138,7 +139,7 @@
         if hosts is None:
             hosts = self.cdb.store_db_list_all_hosts()
         for host in hosts:
-            self.perhost_rules_from_store = Rule.get_rules(
+            self.perhost_rules_from_store = retention.ruleutils.get_rules(
                 self.cdb, host, Status.text_to_status('good'))
 
             if self.perhost_rules_from_store is not None:
@@ -812,7 +813,7 @@
                 print 'skipping %s, not in current dir listing' % entry
                 print self.current_dir_contents_dict
                 continue
-            filetype = Rule.entrytype_to_text(
+            filetype = retention.ruleutils.entrytype_to_text(
                 self.current_dir_contents_dict[entry]['type'])
             if filetype == 'link':
                 print 'No need to mark', file_expr, 'links are always skipped'
@@ -821,7 +822,7 @@
                 print 'Not a dir or regular file, no need to mark, skipping'
                 continue
             status = Status.text_to_status('good')
-            Rule.do_add_rule(self.cdb, file_expr, filetype, status, self.host)
+            retention.ruleutils.do_add_rule(self.cdb, file_expr, filetype, 
status, self.host)
         return True
 
     def check_rules_path(self, rules_path):
@@ -866,11 +867,11 @@
                 path = os.path.join(self.current_dir, path)
             if path[-1] == os.path.sep:
                 path = path[:-1]
-                filetype = Rule.text_to_entrytype('dir')
+                filetype = retention.ruleutils.text_to_entrytype('dir')
             else:
-                filetype = Rule.text_to_entrytype('file')
+                filetype = retention.ruleutils.text_to_entrytype('file')
 
-            Rule.do_add_rule(self.cdb, path, filetype, status, self.host)
+            retention.ruleutils.do_add_rule(self.cdb, path, filetype, status, 
self.host)
             # update the ignores list since we have a new rule
             self.perhost_ignores_from_rules = {}
             self.get_perhost_ignores_from_rules([self.host])
@@ -899,23 +900,23 @@
                 if prefix == "":
                     prefix = "/"
                 if status == 'a' or status == 'A':
-                    Rule.show_rules(self.cdb, self.host, prefix=prefix)
+                    retention.ruleutils.show_rules(self.cdb, self.host, 
prefix=prefix)
                     return True
                 elif status[0].upper() in Status.STATUSES:
-                    Rule.show_rules(self.cdb, self.host, status[0].upper(),
-                                    prefix=prefix)
+                    retention.ruleutils.show_rules(self.cdb, self.host, 
status[0].upper(),
+                                                   prefix=prefix)
                     return True
         elif command == 'D' or command == 'd':
             if not self.current_dir_contents_list:
                 self.get_dir_contents(self.current_dir, self.batchno)
-            Rule.get_rules_for_path(self.cdb, self.current_dir, self.host)
+            retention.ruleutils.get_rules_for_path(self.cdb, self.current_dir, 
self.host)
             return True
         elif command == 'C' or command == 'c':
             if not self.current_dir_contents_list:
                 self.get_dir_contents(self.current_dir, self.batchno)
-            Rule.get_rules_for_entries(self.cdb, self.current_dir,
-                                       self.current_dir_contents_dict,
-                                       self.host)
+            retention.ruleutils.get_rules_for_entries(self.cdb, 
self.current_dir,
+                                                      
self.current_dir_contents_dict,
+                                                      self.host)
             return True
         elif command == 'R' or command == 'r':
             # fixme need different completer here I think, that
@@ -929,7 +930,7 @@
                 path = os.path.join(self.current_dir, path)
             if path[-1] == os.path.sep:
                 path = path[:-1]
-            Rule.do_remove_rule(self.cdb, path, self.host)
+            retention.ruleutils.do_remove_rule(self.cdb, path, self.host)
             # update the ignores list since we removed a rule
             self.perhost_ignores_from_rules = {}
             self.get_perhost_ignores_from_rules([self.host])
@@ -943,7 +944,7 @@
             if not self.check_rules_path(rules_path):
                 print "bad rules file path specified, aborting"
             else:
-                Rule.import_rules(self.cdb, rules_path, self.host)
+                retention.ruleutils.import_rules(self.cdb, rules_path, 
self.host)
             return True
         elif command == 'E' or command == 'e':
             readline.set_completer(None)
@@ -954,7 +955,7 @@
             if not self.check_rules_path(rules_path):
                 print "bad rules file path specified, aborting"
             else:
-                Rule.export_rules(self.cdb, rules_path, self.host)
+                retention.ruleutils.export_rules(self.cdb, rules_path, 
self.host)
             return True
         elif command == 'Q' or command == 'q':
             print "quitting this level"
@@ -1116,9 +1117,9 @@
             if command in Status.STATUSES:
                 # this option is invoked on a directory so
                 # type is dir every time
-                Rule.do_add_rule(self.cdb, dir_path,
-                                 Rule.text_to_entrytype('dir'),
-                                 command, self.host)
+                retention.ruleutils.do_add_rule(self.cdb, dir_path,
+                                                
retention.ruleutils.text_to_entrytype('dir'),
+                                                command, self.host)
                 return None
             elif command == 'Q' or command == 'q':
                 return None
diff --git a/dataretention/retention/localfileaudit.py 
b/dataretention/retention/localfileaudit.py
index 5a19be9..24b54c2 100644
--- a/dataretention/retention/localfileaudit.py
+++ b/dataretention/retention/localfileaudit.py
@@ -14,6 +14,7 @@
 from retention.config import Config
 from retention.fileinfo import FileInfo
 import retention.fileutils
+import retention.ruleutils
 
 
 class LocalFilesAuditor(object):
@@ -172,10 +173,10 @@
                 for rule in self.perhost_rules_from_store[host]:
                     path = os.path.join(rule['basedir'], rule['name'])
                     if rule['status'] == 'good':
-                        if Rule.entrytype_to_text(rule['type']) == 'dir':
+                        if retention.ruleutils.entrytype_to_text(rule['type']) 
== 'dir':
                             if path not in self.ignored['dirs']['/']:
                                 self.ignored['dirs']['/'].append(path)
-                        elif Rule.entrytype_to_text(rule['type']) == 'file':
+                        elif 
retention.ruleutils.entrytype_to_text(rule['type']) == 'file':
                             if path not in self.ignored['files']['/']:
                                 self.ignored['files']['/'].append(path)
                         else:
diff --git a/dataretention/retention/remotefileauditor.py 
b/dataretention/retention/remotefileauditor.py
index aa45231..040b58b 100644
--- a/dataretention/retention/remotefileauditor.py
+++ b/dataretention/retention/remotefileauditor.py
@@ -17,6 +17,7 @@
 from retention.utils import JsonHelper
 from retention.runner import Runner
 from retention.localfileaudit import LocalFilesAuditor
+import retention.ruleutils
 
 global_keys = [key for key, value_unused in
                sys.modules[__name__].__dict__.items()]
@@ -105,8 +106,6 @@
         store_filepath: full path to rule store (sqlite3 db)
         verbose:      show informative messages during processing
         '''
-
-        global rules
 
         self.hosts_expr = hosts_expr
         self.audit_type = audit_type
@@ -211,7 +210,7 @@
             os.makedirs(where_to_put, 0755)
         for host in hosts:
             nicepath = os.path.join(where_to_put, host + ".conf")
-            Rule.export_rules(self.cdb, nicepath, host)
+            retention.ruleutils.export_rules(self.cdb, nicepath, host)
 
     def set_up_ignored(self):
         '''
@@ -272,10 +271,10 @@
                                     continue
                                 if entry[-1] == os.path.sep:
                                     entry = entry[:-1]
-                                    entry_type = Rule.text_to_entrytype('dir')
+                                    entry_type = 
retention.ruleutils.text_to_entrytype('dir')
                                 else:
-                                    entry_type = Rule.text_to_entrytype('file')
-                                rule = Rule.get_rule_as_json(
+                                    entry_type = 
retention.ruleutils.text_to_entrytype('file')
+                                rule = retention.ruleutils.get_rule_as_json(
                                     entry, entry_type, status)
                                 rules[host].append(rule)
         return rules
@@ -287,7 +286,7 @@
             rulescode = "rules = {}\n\n"
             rulescode += "rules['%s'] = [\n" % host
             rulescode += (indent +
-                     (",\n%s" % (indent + indent)).join(rules[host]) + "\n")
+                          (",\n%s" % (indent + indent)).join(rules[host]) + 
"\n")
             rulescode += "]\n"
 
             with open("/srv/salt/audits/retention/configs/%s_store.py" % host, 
"w+") as fp:
@@ -335,7 +334,7 @@
                 if basedir in basedirs or basedir == '*':
                     sys.stderr.write("INFO: " + ','.join(
                         self.ignored['extensions'][basedir])
-                        + " in " + basedir + '\n')
+                                     + " in " + basedir + '\n')
 
     def normalize(self, fname):
         '''
@@ -482,7 +481,7 @@
         hostlist = report.keys()
         for host in hostlist:
             try:
-                problem_rules = Rule.get_rules(self.cdb, host, 
Status.text_to_status('problem'))
+                problem_rules = retention.ruleutils.get_rules(self.cdb, host, 
Status.text_to_status('problem'))
             except:
                 print 'WARNING: problem retrieving problem rules for host', 
host
                 problem_rules = None
@@ -495,9 +494,9 @@
             if dirs_problem is not None:
                 dirs_problem = list(set(dirs_problem))
                 for dirname in dirs_problem:
-                    Rule.do_add_rule(self.cdb, dirname,
-                                     Rule.text_to_entrytype('dir'),
-                                     Status.text_to_status('problem'), host)
+                    retention.ruleutils.do_add_rule(self.cdb, dirname,
+                                                    
retention.ruleutils.text_to_entrytype('dir'),
+                                                    
Status.text_to_status('problem'), host)
 
             if dirs_skipped is not None:
                 dirs_skipped = list(set(dirs_skipped))
@@ -505,8 +504,6 @@
                     if dirname in dirs_problem or dirname in existing_problems:
                         # problem report overrides 'too many to audit'
                         continue
-                    Rule.do_add_rule(self.cdb, dirname,
-                                     Rule.text_to_entrytype('dir'),
-                                     Status.text_to_status('unreviewed'), host)
-
-
+                    retention.ruleutils.do_add_rule(self.cdb, dirname,
+                                                    
retention.ruleutils.text_to_entrytype('dir'),
+                                                    
Status.text_to_status('unreviewed'), host)
diff --git a/dataretention/retention/rule.py b/dataretention/retention/rule.py
index 7b3149a..c46e670 100644
--- a/dataretention/retention/rule.py
+++ b/dataretention/retention/rule.py
@@ -7,6 +7,34 @@
 from retention.saltclientplus import LocalClientPlus
 from retention.status import Status
 
+def to_unicode(param):
+    '''                                                                        
                                                                                
     
+    convert a parameter to unicode if it is not already                        
                                                                                
     
+    '''
+    newparam = param
+    if not isinstance(param, unicode):
+        try:
+            newparam = unicode(param, 'utf-8')
+        except:
+            pass
+    if newparam is None:
+        newparam = param
+    return newparam
+
+def from_unicode(param):
+    '''                                                                        
                                                                                
     
+    convert a parameter from unicode back to bytes it is not already           
                                                                                
     
+    '''
+    newparam = param
+    if isinstance(param, unicode):
+        try:
+            newparam = param.encode('utf-8', 'replace')
+        except:
+            pass
+        if newparam is None:
+            newparam = param
+    return newparam
+
 class Rule(object):
     '''
     manage rules, i.e. tuples (status, abspath, type)
@@ -25,437 +53,6 @@
     STATE_START = 0
     STATE_EXPECT_STATUS = 1
     STATE_EXPECT_ENTRIES = 2
-
-    @staticmethod
-    def to_unicode(param):
-        '''
-        convert a parameter to unicode if it is not already
-        '''
-        newparam = param
-        if not isinstance(param, unicode):
-            try:
-                newparam = unicode(param, 'utf-8')
-            except:
-                pass
-        if newparam is None:
-            newparam = param
-        return newparam
-
-    @staticmethod
-    def from_unicode(param):
-        '''
-        convert a parameter from unicode back to bytes it is not already
-        '''
-        newparam = param
-        if isinstance(param, unicode):
-            try:
-                newparam = param.encode('utf-8', 'replace')
-            except:
-                pass
-            if newparam is None:
-                newparam = param
-        return newparam
-
-    @staticmethod
-    def get_rules_for_entries(cdb, path, path_entries, host, quiet=False):
-        rules = Rule.get_rules_for_path(cdb, path, host, True)
-        for entry in path_entries:
-            rules.extend(Rule.get_rules_for_path(cdb, entry, host, True))
-
-        paths_kept = []
-        uniq = []
-        for rule in rules:
-            if rule['path'] not in paths_kept:
-                paths_kept.append(rule['path'])
-                uniq.append(rule)
-
-        if not quiet:
-            uniq_sorted = sorted(uniq, key=lambda r: r['path'])
-            for rule in uniq_sorted:
-                print rule
-        return uniq_sorted
-
-    @staticmethod
-    def format_rules_for_export(rules_list, indent_count):
-        if len(rules_list) == 0:
-            return "[]"
-
-        spaces = " " * 4
-        indent = spaces * indent_count
-        return ("[\n" + indent + spaces +
-                (",\n" + indent + spaces).join(
-                    ["'" + rule['path'].replace("'", r"\'") + "'"
-                     for rule in rules_list]
-                )
-                + "\n" + indent + "]")
-
-    @staticmethod
-    def import_rule_list(cdb, entries, status, host):
-        '''
-        import status rules for a list of files or dirs
-        - anything not ending in '/' is considered to be a file
-        - files/dirs must be specified by full path, anything else
-          will be skipped
-        - failures to add to rule store are reported but processing continues
-        '''
-        for entry in entries:
-            if entry[0] != os.path.sep:
-                print "relative path in rule, skipping:", entry
-                continue
-            if entry[-1] == '/':
-                entry_type = Rule.text_to_entrytype('dir')
-                entry = entry[:-1]
-            else:
-                entry_type = Rule.text_to_entrytype('file')
-            try:
-                Rule.do_add_rule(cdb, entry, entry_type,
-                                 status, host)
-            except:
-                exc_type, exc_value, exc_traceback = sys.exc_info()
-                sys.stderr.write(repr(traceback.format_exception(
-                    exc_type, exc_value, exc_traceback)))
-                sys.stderr.write("Couldn't add rule for %s to rule store\n" %
-                                 entry)
-
-    @staticmethod
-    def import_handle_status(line):
-        '''
-        see if the line passed is a status def line
-        returns status found (if any) and next state
-        '''
-        for stat in Status.status_cf:
-            result = Status.status_cf[stat][1].match(line)
-            if result is not None:
-                if "]" in result.group(0):
-                    return None, Rule.STATE_EXPECT_STATUS
-                else:
-                    return stat, Rule.STATE_EXPECT_ENTRIES
-        return None, None
-
-    @staticmethod
-    def import_rules(cdb, rules_path, host):
-        # we don't toss all existing rules, these get merged into
-        # the rules already in the rules store
-
-        # it is possible to bork the list of files by deliberately
-        # including a file/dir with a newline in the name; this will
-        # just mean that your rule doesn't cover the files/dirs you want.
-        try:
-            rules_text = open(rules_path).read()
-        except:
-            exc_type, exc_value, exc_traceback = sys.exc_info()
-            sys.stderr.write(repr(traceback.format_exception(
-                exc_type, exc_value, exc_traceback)))
-            sys.stderr.write("Couldn't read rules from %s.\n" % rules_path)
-            return
-
-        lines = rules_text.split("\n")
-        state = Rule.STATE_START
-        rules = {}
-        active = None
-        for line in lines:
-            if Rule.comment_expr.match(line) or Rule.blank_expr.match(line):
-                continue
-            elif state == Rule.STATE_START:
-                if not Rule.first_line_expected.match(line):
-                    print "unexpected line in rules file, wanted "
-                    print "'dir_rules = ...', aborting:"
-                    print line
-                    return
-                else:
-                    state = Rule.STATE_EXPECT_STATUS
-            elif state == Rule.STATE_EXPECT_STATUS:
-                if Rule.last_line_expected.match(line):
-                    # done parsing file
-                    break
-                active, state = Rule.import_handle_status(line)
-                if state == Rule.STATE_EXPECT_STATUS:
-                    continue
-                elif state == Rule.STATE_EXPECT_ENTRIES:
-                    rules[active] = []
-                elif state is None:
-                    # not a status with empty list, not a status
-                    # expecting entries on following lines, bail
-                    print "unexpected line in rules file, aborting:"
-                    print line
-                    return
-            elif state == Rule.STATE_EXPECT_ENTRIES:
-                if Rule.entry_expr.match(line):
-                    result = Rule.entry_expr.match(line)
-                    rules[active].append(result.group(1))
-                elif Rule.end_entries_expr.match(line):
-                    active = None
-                    state = Rule.STATE_EXPECT_STATUS
-                else:
-                    active, state = Rule.import_handle_status(line)
-                    if state == Rule.STATE_EXPECT_STATUS:
-                        # end of entries with crap syntax, we forgive
-                        continue
-                    elif state == Rule.STATE_EXPECT_ENTRIES:
-                        # found a status line with empty list.
-                        # so end of these entries ayways
-                        state = Rule.STATE_EXPECT_STATUS
-                        continue
-                    elif state is None:
-                        # not an entry, not a status, not end of entries
-                        print "unexpected line in rules file, wanted entry, "
-                        print "status or entry end marker, aborting:"
-                        print line
-                        return
-            else:
-                print "unexpected line in rules file, aborting:"
-                print line
-                return
-
-        for status in Status.status_cf:
-            if status in rules:
-                Rule.import_rule_list(
-                    cdb, rules[status],
-                    Status.status_cf[status][0], host)
-
-    @staticmethod
-    def do_remove_rule(cdb, path, host):
-        cdb.store_db_delete({'basedir': os.path.dirname(path),
-                             'name': os.path.basename(path)},
-                            host)
-
-    @staticmethod
-    def do_remove_rules(cdb, status, host):
-        cdb.store_db_delete({'status': status},
-                            host)
-
-    @staticmethod
-    def do_add_rule(cdb, path, rtype, status, host):
-        cdb.store_db_replace({'basedir': os.path.dirname(path),
-                              'name': os.path.basename(path),
-                              'type': rtype,
-                              'status': status},
-                             host)
-
-    @staticmethod
-    def check_host_table_exists(cdb, host):
-        return cdb.store_db_check_host_table(host)
-        
-    @staticmethod
-    def normalize_path(path, ptype):
-        '''
-        make sure the path ends in '/' if it's dir type, otherwise
-        that it does not, return the normalized path
-        '''
-        if ptype == 'dir':
-            if path[-1] != os.path.sep:
-                path = path + os.path.sep
-        else:
-            if path[-1] == os.path.sep:
-                path = path[:-1]
-        return path
-
-    @staticmethod
-    def export_rules(cdb, rules_path, host, status=None):
-        # would be nice to be able to only export some rules. whatever
-
-        rules = Rule.get_rules(cdb, host, status)
-        sorted_rules = {}
-        for stext in Status.STATUS_TEXTS:
-            sorted_rules[stext] = []
-        for rule in rules:
-            if rule['status'] in Status.STATUS_TEXTS:
-                rule['path'] = Rule.normalize_path(rule['path'], rule['type'])
-                sorted_rules[rule['status']].append(rule)
-            else:
-                continue
-
-        output = "dir_rules = {\n"
-        for status in Status.STATUS_TEXTS:
-            output += "    '%s': %s,\n" % (
-                status, Rule.format_rules_for_export(sorted_rules[status], 2))
-        output += "}\n"
-        try:
-            filep = open(rules_path, "w")
-            filep.write("# -*- coding: utf-8 -*-\n")
-            filep.write(output)
-            filep.close()
-        except:
-            exc_type, exc_value, exc_traceback = sys.exc_info()
-            sys.stderr.write(repr(traceback.format_exception(
-                exc_type, exc_value, exc_traceback)))
-            sys.stderr.write("Couldn't save rules into %s.\n" % rules_path)
-
-    @staticmethod
-    def entrytype_to_text(abbrev):
-        if abbrev in Rule.TYPES:
-            return Rule.TYPES_TO_TEXT[abbrev]
-        else:
-            return None
-
-    @staticmethod
-    def text_to_entrytype(fullname):
-        for key in Rule.TYPES_TO_TEXT:
-            if Rule.TYPES_TO_TEXT[key] == fullname:
-                return key
-        return None
-
-    @staticmethod
-    def row_to_rule(row):
-        # ('/home/ariel/wmf/security', '/home/ariel/wmf/security/openjdk6', 
'D', 'G')
-        (basedir, name, entrytype, status) = row
-        basedir = Rule.from_unicode(basedir)
-        name = Rule.from_unicode(name)
-        rule = {'path': os.path.join(basedir, name),
-                'type': Rule.entrytype_to_text(entrytype),
-                'status': Status.status_to_text(status)}
-        return rule
-
-    @staticmethod
-    def get_rules(cdb, host, status=None):
-        if status:
-            crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
-                                      {'status': status}, host)
-        else:
-            crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
-                                      None, host)
-        rules = []
-        rows = RuleStore.store_db_get_all_rows(crs)
-        for row in rows:
-            rules.append(Rule.row_to_rule(row))
-        return rules
-
-    @staticmethod
-    def show_rules(cdb, host, status=None, prefix=None):
-        rules = Rule.get_rules(cdb, host, status)
-        if rules:
-            rules_sorted = sorted(rules, key=lambda r: r['path'])
-            for rule in rules_sorted:
-                if prefix is None or rule['path'].startswith(prefix):
-                    print rule
-
-    @staticmethod
-    def get_rules_with_prefix(cdb, path, host):
-        '''
-        retrieve all rules where the basedir starts with the specified path
-        '''
-        # prefixes...
-        crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
-                                  {'basedir': path}, host)
-        rules = []
-        rows = RuleStore.store_db_get_all_rows(crs)
-        for row in rows:
-            rules.append(Rule.row_to_rule(row))
-        return rules
-
-    @staticmethod
-    def check_rule_prefixes(rows):
-        '''
-        separate out the rules with wildcards in the name field
-        and those without
-        '''
-        text = []
-        wildcards = []
-        if rows is None:
-            return text, wildcards
-
-        for row in rows:
-            if '*' in os.path.basename(row['path']):
-                wildcards.append(row)
-            else:
-                text.append(row)
-        return text, wildcards
-
-    @staticmethod
-    def rule_is_prefix(basedir, name, path, wildcard=False):
-        '''
-        if the dir part of the rule entry plus the basename is
-        a proper path prefix of the specified path (followed by the
-        path separator, or it's the exact path), return True, else False
-
-        wildcard matches are done only for a single wildcard in the name
-        component of the rule entry and does not cross a directory path
-        component i.e. basedir = /a/b and name = c* will not match
-        path /a/b/cow/dog  but will match /a/b/cow
-        '''
-        if not wildcard:
-            if path.startswith(os.path.join(basedir, name) + os.path.sep):
-                return True
-            elif path == os.path.join(basedir, name):
-                return True
-        else:
-            rulepath = os.path.join(basedir, name)
-            if len(rulepath) >= len(path):
-                return False
-
-            left, right = rulepath.split('*', 1)
-            if path.startswith(left):
-                if path.endswith(right):
-                    if os.path.sep not in path[len(left): -1 * len(right)]:
-                        return True
-        return False
-
-    @staticmethod
-    def get_rules_for_path(cdb, path, host, quiet=False):
-        # get all paths starting from / and descending to the specified path
-        prefixes = Rule.get_prefixes(path)
-        rows = []
-        # get all entries where the dir part of the path is a prefix and the
-        # name part of the path will be checked to see if it is the next dir
-        # elt in the path or wildcard matches it
-
-        for pref in prefixes:
-            rows.extend(Rule.get_rules_with_prefix(cdb, pref, host))
-        # split out the rules with wildcards in the basename from the rest
-        regulars, wildcards = Rule.check_rule_prefixes(rows)
-        keep = []
-        paths_kept = []
-        for plain in regulars:
-            if Rule.rule_is_prefix(os.path.dirname(plain['path']),
-                                   os.path.basename(plain['path']), path):
-                if plain['path'] not in paths_kept:
-                    keep.append(plain)
-                    paths_kept.append(plain['path'])
-        for wild in wildcards:
-            if Rule.rule_is_prefix(os.path.dirname(wild['path']),
-                                   os.path.basename(wild['path']),
-                                   path, wildcard=True):
-                if wild['path'] not in paths_kept:
-                    keep.append(wild)
-                    paths_kept.append(wild['path'])
-
-        if len(keep) == 0:
-            keep_sorted = keep
-        else:
-            keep_sorted = sorted(keep, key=lambda r: r['path'])
-        if not quiet:
-            print "No rules for directory"
-        else:
-            for rule in keep_sorted:
-                print rule
-        return keep_sorted
-
-    @staticmethod
-    def get_prefixes(path):
-        '''
-        given an absolute path like /a/b/c, return the list of all paths
-        starting from / and descending to the specified path
-        i.e. if given '/a/b/c', would return ['/', '/a', '/a/b', 'a/b/c']
-        for relative paths or empty paths we return an empty prefix list
-        '''
-        if not path or path[0] != '/':
-            return []
-        fields = path.split(os.path.sep)
-        prefix = "/"
-        prefixes = [prefix]
-        for field in fields:
-            if field:
-                prefix = os.path.join(prefix, field)
-                prefixes.append(prefix)
-        return prefixes
-
-    @staticmethod
-    def get_rule_as_json(path, ptype, status):
-        rule = {'basedir': os.path.dirname(path),
-                'name': os.path.basename(path),
-                'type': ptype,
-                'status': status}
-        return json.dumps(rule)
 
 
 class RuleStore(object):
@@ -633,8 +230,8 @@
 
         self.crs.execute("INSERT INTO %s VALUES (?, ?, ?, ?)"
                          % self.get_tablename(host),
-                         (Rule.to_unicode(params['basedir']),
-                          Rule.to_unicode(params['name']),
+                         (to_unicode(params['basedir']),
+                          to_unicode(params['name']),
                           params['type'],
                           params['status']))
         self.store_db.commit()
@@ -651,8 +248,8 @@
 
         self.crs.execute("INSERT OR REPLACE INTO  %s VALUES (?, ?, ?, ?)"
                          % self.get_tablename(host),
-                         (Rule.to_unicode(params['basedir']),
-                          Rule.to_unicode(params['name']),
+                         (to_unicode(params['basedir']),
+                          to_unicode(params['name']),
                           params['type'],
                           params['status']))
         self.store_db.commit()
diff --git a/dataretention/retention/ruleutils.py 
b/dataretention/retention/ruleutils.py
new file mode 100644
index 0000000..b23086f
--- /dev/null
+++ b/dataretention/retention/ruleutils.py
@@ -0,0 +1,386 @@
+import os
+import sys
+import json
+import traceback
+from retention.status import Status
+import retention.rule
+from retention.rule import Rule, RuleStore
+
+def get_rules_for_entries(cdb, path, path_entries, host, quiet=False):
+    rules = get_rules_for_path(cdb, path, host, True)
+    for entry in path_entries:
+        rules.extend(get_rules_for_path(cdb, entry, host, True))
+
+    paths_kept = []
+    uniq = []
+    for rule in rules:
+        if rule['path'] not in paths_kept:
+            paths_kept.append(rule['path'])
+            uniq.append(rule)
+
+    if not quiet:
+        uniq_sorted = sorted(uniq, key=lambda r: r['path'])
+        for rule in uniq_sorted:
+            print rule
+    return uniq_sorted
+
+def format_rules_for_export(rules_list, indent_count):
+    if len(rules_list) == 0:
+        return "[]"
+
+    spaces = " " * 4
+    indent = spaces * indent_count
+    return ("[\n" + indent + spaces +
+            (",\n" + indent + spaces).join(
+                ["'" + rule['path'].replace("'", r"\'") + "'"
+                 for rule in rules_list]
+            )
+            + "\n" + indent + "]")
+
+def import_rule_list(cdb, entries, status, host):
+    '''
+    import status rules for a list of files or dirs
+    - anything not ending in '/' is considered to be a file
+    - files/dirs must be specified by full path, anything else
+    will be skipped
+    - failures to add to rule store are reported but processing continues
+    '''
+    for entry in entries:
+        if entry[0] != os.path.sep:
+            print "relative path in rule, skipping:", entry
+            continue
+        if entry[-1] == '/':
+            entry_type = text_to_entrytype('dir')
+            entry = entry[:-1]
+        else:
+            entry_type = text_to_entrytype('file')
+        try:
+            do_add_rule(cdb, entry, entry_type,
+                        status, host)
+        except:
+            exc_type, exc_value, exc_traceback = sys.exc_info()
+            sys.stderr.write(repr(traceback.format_exception(
+                exc_type, exc_value, exc_traceback)))
+            sys.stderr.write("Couldn't add rule for %s to rule store\n" %
+                             entry)
+
+def import_handle_status(line):
+    '''
+    see if the line passed is a status def line
+    returns status found (if any) and next state
+    '''
+    for stat in Status.status_cf:
+        result = Status.status_cf[stat][1].match(line)
+        if result is not None:
+            if "]" in result.group(0):
+                return None, Rule.STATE_EXPECT_STATUS
+            else:
+                return stat, Rule.STATE_EXPECT_ENTRIES
+    return None, None
+
+def import_rules(cdb, rules_path, host):
+    # we don't toss all existing rules, these get merged into
+    # the rules already in the rules store
+
+    # it is possible to bork the list of files by deliberately
+    # including a file/dir with a newline in the name; this will
+    # just mean that your rule doesn't cover the files/dirs you want.
+    try:
+        rules_text = open(rules_path).read()
+    except:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        sys.stderr.write(repr(traceback.format_exception(
+            exc_type, exc_value, exc_traceback)))
+        sys.stderr.write("Couldn't read rules from %s.\n" % rules_path)
+        return
+
+    lines = rules_text.split("\n")
+    state = Rule.STATE_START
+    rules = {}
+    active = None
+    for line in lines:
+        if Rule.comment_expr.match(line) or Rule.blank_expr.match(line):
+            continue
+        elif state == Rule.STATE_START:
+            if not Rule.first_line_expected.match(line):
+                print "unexpected line in rules file, wanted "
+                print "'dir_rules = ...', aborting:"
+                print line
+                return
+            else:
+                state = Rule.STATE_EXPECT_STATUS
+        elif state == Rule.STATE_EXPECT_STATUS:
+            if Rule.last_line_expected.match(line):
+                # done parsing file
+                break
+            active, state = import_handle_status(line)
+            if state == Rule.STATE_EXPECT_STATUS:
+                continue
+            elif state == Rule.STATE_EXPECT_ENTRIES:
+                rules[active] = []
+            elif state is None:
+                # not a status with empty list, not a status
+                # expecting entries on following lines, bail
+                print "unexpected line in rules file, aborting:"
+                print line
+                return
+        elif state == Rule.STATE_EXPECT_ENTRIES:
+            if Rule.entry_expr.match(line):
+                result = Rule.entry_expr.match(line)
+                rules[active].append(result.group(1))
+            elif Rule.end_entries_expr.match(line):
+                active = None
+                state = Rule.STATE_EXPECT_STATUS
+            else:
+                active, state = import_handle_status(line)
+                if state == Rule.STATE_EXPECT_STATUS:
+                    # end of entries with crap syntax, we forgive
+                    continue
+                elif state == Rule.STATE_EXPECT_ENTRIES:
+                    # found a status line with empty list.
+                    # so end of these entries ayways
+                    state = Rule.STATE_EXPECT_STATUS
+                    continue
+                elif state is None:
+                    # not an entry, not a status, not end of entries
+                    print "unexpected line in rules file, wanted entry, "
+                    print "status or entry end marker, aborting:"
+                    print line
+                    return
+        else:
+            print "unexpected line in rules file, aborting:"
+            print line
+            return
+
+    for status in Status.status_cf:
+        if status in rules:
+            import_rule_list(
+                cdb, rules[status],
+                Status.status_cf[status][0], host)
+
+def do_remove_rule(cdb, path, host):
+    cdb.store_db_delete({'basedir': os.path.dirname(path),
+                         'name': os.path.basename(path)},
+                        host)
+
+def do_remove_rules(cdb, status, host):
+    cdb.store_db_delete({'status': status},
+                        host)
+
+def do_add_rule(cdb, path, rtype, status, host):
+    cdb.store_db_replace({'basedir': os.path.dirname(path),
+                          'name': os.path.basename(path),
+                          'type': rtype,
+                          'status': status},
+                         host)
+
+def check_host_table_exists(cdb, host):
+    return cdb.store_db_check_host_table(host)
+
+def normalize_path(path, ptype):
+    '''
+    make sure the path ends in '/' if it's dir type, otherwise
+    that it does not, return the normalized path
+    '''
+    if ptype == 'dir':
+        if path[-1] != os.path.sep:
+            path = path + os.path.sep
+    else:
+        if path[-1] == os.path.sep:
+            path = path[:-1]
+    return path
+
+def export_rules(cdb, rules_path, host, status=None):
+    # would be nice to be able to only export some rules. whatever
+
+    rules = get_rules(cdb, host, status)
+    sorted_rules = {}
+    for stext in Status.STATUS_TEXTS:
+        sorted_rules[stext] = []
+    for rule in rules:
+        if rule['status'] in Status.STATUS_TEXTS:
+            rule['path'] = normalize_path(rule['path'], rule['type'])
+            sorted_rules[rule['status']].append(rule)
+        else:
+            continue
+
+    output = "dir_rules = {\n"
+    for status in Status.STATUS_TEXTS:
+        output += "    '%s': %s,\n" % (
+            status, format_rules_for_export(sorted_rules[status], 2))
+    output += "}\n"
+    try:
+        filep = open(rules_path, "w")
+        filep.write("# -*- coding: utf-8 -*-\n")
+        filep.write(output)
+        filep.close()
+    except:
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        sys.stderr.write(repr(traceback.format_exception(
+            exc_type, exc_value, exc_traceback)))
+        sys.stderr.write("Couldn't save rules into %s.\n" % rules_path)
+
+def entrytype_to_text(abbrev):
+    if abbrev in Rule.TYPES:
+        return Rule.TYPES_TO_TEXT[abbrev]
+    else:
+        return None
+
+def text_to_entrytype(fullname):
+    for key in Rule.TYPES_TO_TEXT:
+        if Rule.TYPES_TO_TEXT[key] == fullname:
+            return key
+    return None
+
+def row_to_rule(row):
+    # ('/home/ariel/wmf/security', '/home/ariel/wmf/security/openjdk6', 'D', 
'G')
+    (basedir, name, entrytype, status) = row
+    basedir = retention.rule.from_unicode(basedir)
+    name = retention.rule.from_unicode(name)
+    rule = {'path': os.path.join(basedir, name),
+            'type': entrytype_to_text(entrytype),
+            'status': Status.status_to_text(status)}
+    return rule
+
+def get_rules(cdb, host, status=None):
+    if status:
+        crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
+                                  {'status': status}, host)
+    else:
+        crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
+                                  None, host)
+    rules = []
+    rows = RuleStore.store_db_get_all_rows(crs)
+    for row in rows:
+        rules.append(row_to_rule(row))
+    return rules
+
+def show_rules(cdb, host, status=None, prefix=None):
+    rules = get_rules(cdb, host, status)
+    if rules:
+        rules_sorted = sorted(rules, key=lambda r: r['path'])
+        for rule in rules_sorted:
+            if prefix is None or rule['path'].startswith(prefix):
+                print rule
+
+def get_rules_with_prefix(cdb, path, host):
+    '''
+    retrieve all rules where the basedir starts with the specified path
+    '''
+    # prefixes...
+    crs = cdb.store_db_select(['basedir', 'name', 'type', 'status'],
+                              {'basedir': path}, host)
+    rules = []
+    rows = RuleStore.store_db_get_all_rows(crs)
+    for row in rows:
+        rules.append(row_to_rule(row))
+    return rules
+
+def check_rule_prefixes(rows):
+    '''
+    separate out the rules with wildcards in the name field
+    and those without
+    '''
+    text = []
+    wildcards = []
+    if rows is None:
+        return text, wildcards
+
+    for row in rows:
+        if '*' in os.path.basename(row['path']):
+            wildcards.append(row)
+        else:
+            text.append(row)
+    return text, wildcards
+
+def rule_is_prefix(basedir, name, path, wildcard=False):
+    '''
+    if the dir part of the rule entry plus the basename is
+    a proper path prefix of the specified path (followed by the
+    path separator, or it's the exact path), return True, else False
+
+    wildcard matches are done only for a single wildcard in the name
+    component of the rule entry and does not cross a directory path
+    component i.e. basedir = /a/b and name = c* will not match
+    path /a/b/cow/dog  but will match /a/b/cow
+    '''
+    if not wildcard:
+        if path.startswith(os.path.join(basedir, name) + os.path.sep):
+            return True
+        elif path == os.path.join(basedir, name):
+            return True
+    else:
+        rulepath = os.path.join(basedir, name)
+        if len(rulepath) >= len(path):
+            return False
+
+        left, right = rulepath.split('*', 1)
+        if path.startswith(left):
+            if path.endswith(right):
+                if os.path.sep not in path[len(left): -1 * len(right)]:
+                    return True
+    return False
+
+def get_rules_for_path(cdb, path, host, quiet=False):
+    # get all paths starting from / and descending to the specified path
+    prefixes = get_prefixes(path)
+    rows = []
+    # get all entries where the dir part of the path is a prefix and the
+    # name part of the path will be checked to see if it is the next dir
+    # elt in the path or wildcard matches it
+
+    for pref in prefixes:
+        rows.extend(get_rules_with_prefix(cdb, pref, host))
+    # split out the rules with wildcards in the basename from the rest
+    regulars, wildcards = check_rule_prefixes(rows)
+    keep = []
+    paths_kept = []
+    for plain in regulars:
+        if rule_is_prefix(os.path.dirname(plain['path']),
+                          os.path.basename(plain['path']), path):
+            if plain['path'] not in paths_kept:
+                keep.append(plain)
+                paths_kept.append(plain['path'])
+    for wild in wildcards:
+        if rule_is_prefix(os.path.dirname(wild['path']),
+                          os.path.basename(wild['path']),
+                          path, wildcard=True):
+            if wild['path'] not in paths_kept:
+                keep.append(wild)
+                paths_kept.append(wild['path'])
+
+    if len(keep) == 0:
+        keep_sorted = keep
+    else:
+        keep_sorted = sorted(keep, key=lambda r: r['path'])
+    if not quiet:
+        print "No rules for directory"
+    else:
+        for rule in keep_sorted:
+            print rule
+    return keep_sorted
+
+def get_prefixes(path):
+    '''
+    given an absolute path like /a/b/c, return the list of all paths
+    starting from / and descending to the specified path
+    i.e. if given '/a/b/c', would return ['/', '/a', '/a/b', 'a/b/c']
+    for relative paths or empty paths we return an empty prefix list
+    '''
+    if not path or path[0] != '/':
+        return []
+    fields = path.split(os.path.sep)
+    prefix = "/"
+    prefixes = [prefix]
+    for field in fields:
+        if field:
+            prefix = os.path.join(prefix, field)
+            prefixes.append(prefix)
+    return prefixes
+
+def get_rule_as_json(path, ptype, status):
+    rule = {'basedir': os.path.dirname(path),
+            'name': os.path.basename(path),
+            'type': ptype,
+            'status': status}
+    return json.dumps(rule)
diff --git a/dataretention/rulestore.py b/dataretention/rulestore.py
index 9490f02..976590d 100644
--- a/dataretention/rulestore.py
+++ b/dataretention/rulestore.py
@@ -10,6 +10,7 @@
 
 from retention.saltclientplus import LocalClientPlus
 import retention.utils
+import retention.ruleutils
 from retention.rule import Rule, RuleStore
 from retention.status import Status
 
@@ -65,7 +66,7 @@
         if path and path[-1] == os.path.sep:
             path = path[:-1]
         for host in hosts:
-            Rule.show_rules(cdb, host, status, prefix=path)
+            retention.ruleutils.show_rules(cdb, host, status, prefix=path)
 
     elif action == 'delete':
         if path and path[-1] == os.path.sep:
@@ -76,13 +77,13 @@
                 print "would remove rule for %s in %s" % (path, hosts)
             else:
                 for host in hosts:
-                    Rule.do_remove_rule(cdb, path, host)
+                    retention.ruleutils.do_remove_rule(cdb, path, host)
         elif status:
             if dryrun:
                 print "would remove rules for status %s in %s" % (status, 
hosts)
             else:
                 for host in hosts:
-                    Rule.do_remove_rules(cdb, status, host)
+                    retention.ruleutils.do_remove_rules(cdb, status, host)
 
     elif action == 'add':
         if status is None:
@@ -91,17 +92,17 @@
             usage('path must be specified to add a rule')
 
         if path[-1] == os.path.sep:
-            rtype = Rule.text_to_entrytype('dir')
+            rtype = retention.ruleutils.text_to_entrytype('dir')
             path = path[:-1]
         else:
-            rtype = Rule.text_to_entrytype('file')
+            rtype = retention.ruleutils.text_to_entrytype('file')
 
         if dryrun:
             print "would add rule for %s in %s with status %s of type %s" % (
                 hosts, path, status, rtype)
 
         for host in hosts:
-            Rule.do_add_rule(cdb, path, rtype, status, host)
+            retention.ruleutils.do_add_rule(cdb, path, rtype, status, host)
 
 def main():
     host = None
@@ -150,10 +151,10 @@
     cdb.store_db_init(None)
 
     hosts, htype = retention.utils.get_hosts_expr_type(host)
-    
+
     # if we are given one host, check that the host has a table or whine
     if htype == 'glob' and '*' not in hosts:
-        if not Rule.check_host_table_exists(cdb, host):
+        if not retention.ruleutils.check_host_table_exists(cdb, host):
             usage('no such host in rule store, %s' % host)
     elif htype == 'grain':
         client = LocalClientPlus()

-- 
To view, visit https://gerrit.wikimedia.org/r/233457
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie73f7215a8ff4be9c9a079e03f465b393b3de329
Gerrit-PatchSet: 2
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: ArielGlenn <[email protected]>
Gerrit-Reviewer: ArielGlenn <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to