Markus Korn has proposed merging lp:~thekorn/zeitgeist/db_schema_3 into lp:zeitgeist.
Requested reviews: Zeitgeist Framework Team (zeitgeist) Related bugs: #673394 Queries for subj_uri and subj_origin are using no index https://bugs.launchpad.net/bugs/673394 #673452 Using the subj_origin column of event_view is slower than it should be https://bugs.launchpad.net/bugs/673452 This branch introduces a new db schema (version 3) and bundles a few performance related fixes, [0] compares the performance of this branch lp:zeitgeist (each query is run ~10 times, the plot shows the average). Changes in detail: * renamed event_view.subj_origin to event_view.subj_origin_uri and event_view.subj_origin_id to event_view.subj_origin, this makes "subj_origin" point to an id in the uri table, and "subj_origin_uri" the resolved value. Also we are not using the redundant "SELECT id FROM uri WHERE uri.id=event.subj_origin" query anymore. (LP: #673452) * removed event_view.subj_uri_id and added event.subj_id, both values are in fact the same, but we again sting on another "SELECT" query in the event_view. * Optimization in FindEvent queries: we are not ordering and grouping by resolved values anymore, we are doing it on integer ids, which is much faster (LP: #673394) [0] https://dl.dropbox.com/u/174479/compare_queries.svg -- https://code.launchpad.net/~thekorn/zeitgeist/db_schema_3/+merge/40877 Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~thekorn/zeitgeist/db_schema_3 into lp:zeitgeist.
=== modified file '_zeitgeist/engine/__init__.py' --- _zeitgeist/engine/__init__.py 2010-10-19 13:54:12 +0000 +++ _zeitgeist/engine/__init__.py 2010-11-15 15:11:50 +0000 @@ -55,7 +55,7 @@ # Required version of DB schema CORE_SCHEMA="core" - CORE_SCHEMA_VERSION = 2 + CORE_SCHEMA_VERSION = 3 USER_EXTENSION_PATH = os.path.join(DATA_PATH, "extensions") === modified file '_zeitgeist/engine/main.py' --- _zeitgeist/engine/main.py 2010-11-12 17:39:28 +0000 +++ _zeitgeist/engine/main.py 2010-11-15 15:11:50 +0000 @@ -148,8 +148,9 @@ def _get_subject_from_row(self, row): subject = Subject() - for field in ("uri", "origin", "text", "storage"): + for field in ("uri", "text", "storage"): setattr(subject, field, row["subj_" + field]) + setattr(subject, "origin", row["subj_origin_uri"]) for field in ("interpretation", "manifestation", "mimetype"): setattr(subject, field, getattr(self, "_" + field).value(row["subj_" + field])) @@ -353,10 +354,13 @@ sql += (" ORDER BY timestamp DESC", " ORDER BY timestamp ASC", - " GROUP BY subj_uri ORDER BY timestamp DESC", - " GROUP BY subj_uri ORDER BY timestamp ASC", - " GROUP BY subj_uri ORDER BY COUNT(subj_uri) DESC, timestamp DESC", - " GROUP BY subj_uri ORDER BY COUNT(subj_uri) ASC, timestamp ASC", + # thekorn: please note, event.subj_id == uri.id, as in + # the subj_id points directly to an entry in the uri table, + # so we are in fact grouping by subj_uris here + " GROUP BY subj_id ORDER BY timestamp DESC", + " GROUP BY subj_id ORDER BY timestamp ASC", + " GROUP BY subj_id ORDER BY COUNT(subj_id) DESC, timestamp DESC", + " GROUP BY subj_id ORDER BY COUNT(subj_id) ASC, timestamp ASC", " GROUP BY actor ORDER BY COUNT(actor) DESC, timestamp DESC", " GROUP BY actor ORDER BY COUNT(actor) ASC, timestamp ASC", " GROUP BY actor ORDER BY timestamp DESC", === modified file '_zeitgeist/engine/sql.py' --- _zeitgeist/engine/sql.py 2010-10-25 20:26:03 +0000 +++ _zeitgeist/engine/sql.py 2010-11-15 15:11:50 +0000 @@ -369,14 +369,12 @@ AS payload, (SELECT value FROM uri WHERE uri.id=event.subj_id) AS subj_uri, - (SELECT id FROM uri WHERE uri.id=event.subj_id) - AS subj_uri_id, + event.subj_id, -- #this directly points to an uri event.subj_interpretation, event.subj_manifestation, + event.subj_origin, (SELECT value FROM uri WHERE uri.id=event.subj_origin) - AS subj_origin, - (SELECT id FROM uri WHERE uri.id=event.subj_origin) - AS subj_origin_id, + AS subj_origin_uri, event.subj_mimetype, (SELECT value FROM text WHERE text.id = event.subj_text) AS subj_text, @@ -528,14 +526,17 @@ def add_text_condition(self, column, value, like=False, negation=False, cache=None): if like: assert column in ("subj_uri", "subj_origin", "actor", "subj_mimetype"), \ - "prefix search on the %r column is not supported by zeitgeist" - if column in ("subj_uri", "subj_origin"): - view_column = "%s_id" %column + "prefix search on the %r column is not supported by zeitgeist" %column + if column == "subj_uri": + # subj_id directly points to the id of an uri entry + view_column = "subj_id" else: view_column = column optimized_glob, value = self.optimize_glob("id", TABLE_MAP.get(column, column), value) sql = "%s %sIN (%s)" %(view_column, self.NOT if negation else "", optimized_glob) else: + if column == "subj_origin": + column = "subj_origin_uri" sql = "%s %s= ?" %(column, "!" if negation else "") if cache is not None: value = cache[value] === added file '_zeitgeist/engine/upgrades/core_2_3.py' --- _zeitgeist/engine/upgrades/core_2_3.py 1970-01-01 00:00:00 +0000 +++ _zeitgeist/engine/upgrades/core_2_3.py 2010-11-15 15:11:50 +0000 @@ -0,0 +1,11 @@ +# upgrading from db version 2 to 1 +# this requires no update to the actual data in the database +# it is only a schema change of event_view. This change is done +# in sql.create_db() + +# the schema change is renaming 'subj_uri_id' column to 'subj_id', as +# both values are the same. Also 'subj_origin' gets renamed to +# 'subj_origin_uri' and 'subj_origin_id' to 'subj_origin'. + +def run(cursor): + pass === modified file 'test/sql-test.py' --- test/sql-test.py 2010-09-22 17:15:03 +0000 +++ test/sql-test.py 2010-11-15 15:11:50 +0000 @@ -97,12 +97,12 @@ where = WhereClause(WhereClause.AND) where.add_text_condition("subj_uri", "bar", like=True) self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments), - "(subj_uri_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))") + "(subj_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))") where = WhereClause(WhereClause.AND) where.add_text_condition("subj_origin", "bar", like=True) self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments), - "(subj_origin_id IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))") + "(subj_origin IN (SELECT id FROM uri WHERE (value >= bar AND value < bas)))") where = WhereClause(WhereClause.AND) where.add_text_condition("actor", "bar", like=True, negation=True)
_______________________________________________ Mailing list: https://launchpad.net/~zeitgeist Post to : zeitgeist@lists.launchpad.net Unsubscribe : https://launchpad.net/~zeitgeist More help : https://help.launchpad.net/ListHelp