Yuvipanda has uploaded a new change for review.
https://gerrit.wikimedia.org/r/182848
Change subject: Add report that verifies view definitions
..
Add report that verifies view definitions
- Fix greylisted.yaml to match reality - mostly human errors
that were made when transcribing from the perl file
- Fully paranthesize all SQL expressions in greylisted.yaml
since that is how MySQL seems to give them back when asking
for view definitions
- Use a combination of regex (to clean up) and pyparsing to
parse the SQL used to define a view and convert that into
Table and Column objects
- Diff the Table and Column objects from greylisted/whitelisted
files vs reality and report on things that are missing
Bug: T85473
Change-Id: I2e8896fd55a75fa7f6ae1a63bdbb6c04e47bff5c
---
M auditor/audit.py
A auditor/reports/viewdiffs.py
M greylisted.yaml
M requirements.txt
4 files changed, 212 insertions(+), 93 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/software/labsdb-auditor
refs/changes/48/182848/1
diff --git a/auditor/audit.py b/auditor/audit.py
index 958dda5..10b5371 100644
--- a/auditor/audit.py
+++ b/auditor/audit.py
@@ -33,6 +33,7 @@
from reports.databases import databases_report
from reports.tables import extra_tables_report
+from reports.viewdiffs import views_schema_diff_report
argparser = argparse.ArgumentParser()
@@ -55,8 +56,9 @@
rr = ReportRunner(config)
-rr.register_report(databases_report)
-rr.register_report(extra_tables_report)
+# rr.register_report(databases_report)
+# rr.register_report(extra_tables_report)
+rr.register_report(views_schema_diff_report)
logging.info('Starting report generation')
reports = rr.run()
diff --git a/auditor/reports/viewdiffs.py b/auditor/reports/viewdiffs.py
new file mode 100644
index 000..69415a5
--- /dev/null
+++ b/auditor/reports/viewdiffs.py
@@ -0,0 +1,119 @@
+# Copyright 2015 Yuvi Panda
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from pyparsing import OneOrMore, Optional, Word, SkipTo, StringEnd, alphanums
+
+from ..models import Column, Table
+from ..dbutils import get_tables
+
+
+def _diff(expected, actual, fields):
+diff = {}
+for field in fields:
+a = expected.__getattribute__(field)
+b = actual.__getattribute__(field)
+if a != b:
+diff[field] = {
+'expected': a,
+'found': b
+}
+return diff
+
+def diff_columns(expected, actual):
+return _diff(expected, actual, ('name', 'whitelisted', 'condition',
'replacewith'))
+
+def diff_tables(expected, actual):
+table_diff = _diff(expected, actual, ('name', 'where', 'table_name'))
+missing_cols = list(set(expected.columns.keys()) -
set(actual.columns.keys()))
+extra_cols = list(set(actual.columns.keys()) -
set(expected.columns.keys()))
+common_columns =
set(actual.columns.keys()).intersection(expected.columns.keys())
+column_diffs = {}
+for name in common_columns:
+col_diff = diff_columns(expected.columns[name], actual.columns[name])
+if col_diff:
+column_diffs[name] = col_diff
+
+column_info = {}
+if missing_cols:
+column_info['missing'] = missing_cols
+if extra_cols:
+column_info['extra'] = extra_cols
+if column_diffs:
+column_info['diff'] = column_diffs
+if column_info:
+table_diff['columns'] = column_info
+return table_diff if table_diff else None
+
+EXTRACT_VIEWDEF_RE = re.compile(r"DEFINER VIEW `([^`]+)` AS (.*)")
+def _cleanup_viewdefiner(full_sql, db, table):
+definer_sql = EXTRACT_VIEWDEF_RE.search(full_sql).groups()[1]
+private_db_name = db.replace('_p', '')
+sql = definer_sql.replace('`', '') \
+.replace('%s.%s.' % (private_db_name, table), '')
+
+return sql
+
+# We have to parse a specific subset of SQL, that is used to define views
+# The Grammar is:
+#
+# sql = "SELECT" { column-defintions } "FROM" wikiname.tablename [ "WHERE"
cond ]
+# column-definition = column-name "AS" column-name
+# | "NULL" "AS" column-name
+# | "IF(" cond ", NULL, " column-name ")" "AS" column-name
+#
+# Where cond is a valid SQL relational expression, column-name, wikiname,
tablename are strings
+
+identifier = alphanums + "_"
+if_definition = "if(" + SkipTo(",")("condition") + "," + Word(identifier) +
"," + Word(identifier)