Yuvipanda has submitted this change and it was merged.
Change subject: Memoize parsing View definitions into Table objects
..
Memoize parsing View definitions into Table objects
This patch drops CPU time by about 20x, since profiling
showed that about 50% of time is spent in parsing SQL. We
could also make the parser itself faster, but this seemed to be
a simpler solution.
Also modififes the regex cleaning routine to clean up the name
of the databases as well in the 'from' clause, to increase
hit rate on the memoization. It will be 0 otherwise.
Also stops setting .table on column, to make the data structure
acyclic.
Change-Id: Ibf4f91b07ea45029561ab399b6aea6f00db82aaa
---
M auditor/models.py
M auditor/reports/viewdiffs.py
2 files changed, 11 insertions(+), 3 deletions(-)
Approvals:
Yuvipanda: Verified
Merlijn van Deen: Looks good to me, approved
diff --git a/auditor/models.py b/auditor/models.py
index 5d5cd92..0424f25 100644
--- a/auditor/models.py
+++ b/auditor/models.py
@@ -52,7 +52,6 @@
def add_column(self, column):
self.columns[column.name] = column
-column.table = self
def to_dict(self):
"""
diff --git a/auditor/reports/viewdiffs.py b/auditor/reports/viewdiffs.py
index b0e57a1..1672085 100644
--- a/auditor/reports/viewdiffs.py
+++ b/auditor/reports/viewdiffs.py
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import re
+import copy
import logging
from pyparsing import OneOrMore, Optional, Word, SkipTo, StringEnd, alphanums
@@ -72,7 +73,8 @@
definer_sql = EXTRACT_VIEWDEF_RE.search(full_sql).groups()[1]
private_db_name = db.replace('_p', '')
sql = definer_sql.replace('`', '') \
-.replace('%s.%s.' % (private_db_name, table), '')
+.replace(private_db_name + '.', '') \
+.replace(table + '.', '')
return sql
@@ -90,14 +92,20 @@
if_definition = "if(" + SkipTo(",")("null_if") + "," + Word(identifier) + ","
+ Word(identifier) + ")"
column_definition = (if_definition ^ Word(identifier)('expression')) + "AS" +
Word(identifier)("name") + Optional(",")
sql_definition = "select" + OneOrMore(column_definition)("columns") + \
- "from" + Word(identifier) + "." +
Word(identifier)("tablename") + \
+ "from" + Word(identifier)("tablename") + \
Optional("where" + SkipTo(StringEnd())("include_row_if")) +
StringEnd()
+
+# Caches cleaned definer sql -> table instances
+# Since a lot of definers are the same, this memoization should speed things up
+cache = {}
def _table_from_definer(sql, viewname):
"""
Build a Table object given a cleaned up SQL statement that defines the view
"""
+if sql in cache:
+return copy.deepcopy(cache[sql])
res = sql_definition.parseString(sql)
table = Table(viewname, {}, res.include_row_if if res.include_row_if else
None, res.tablename)
for tokens, start, end in column_definition.scanString(sql):
@@ -105,6 +113,7 @@
whitelisted=tokens.null_if == '' and
tokens.expression != 'NULL',
null_if=tokens.null_if if tokens.null_if else
None))
+cache[sql] = table
return table
--
To view, visit https://gerrit.wikimedia.org/r/184143
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ibf4f91b07ea45029561ab399b6aea6f00db82aaa
Gerrit-PatchSet: 5
Gerrit-Project: operations/software/labsdb-auditor
Gerrit-Branch: master
Gerrit-Owner: Yuvipanda
Gerrit-Reviewer: Merlijn van Deen
Gerrit-Reviewer: Yuvipanda
___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits