Markus Korn has proposed merging lp:~thekorn/zeitgeist/wildcard_support into lp:zeitgeist.
Requested reviews: Mikkel Kamstrup Erlandsen (kamstrup) Zeitgeist Framework Team (zeitgeist) This branch adds wildcard-support to some template-fields, and completes the fix of bug 485966 -- https://code.launchpad.net/~thekorn/zeitgeist/wildcard_support/+merge/25345 Your team Zeitgeist Framework Team is requested to review the proposed merge of lp:~thekorn/zeitgeist/wildcard_support into lp:zeitgeist.
=== modified file '_zeitgeist/engine/main.py' --- _zeitgeist/engine/main.py 2010-05-14 11:54:52 +0000 +++ _zeitgeist/engine/main.py 2010-05-14 17:20:42 +0000 @@ -32,7 +32,7 @@ from collections import defaultdict from zeitgeist.datamodel import Event as OrigEvent, StorageState, TimeRange, \ - ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR + ResultType, get_timestamp_for_now, Interpretation, Symbol, NEGATION_OPERATOR, WILDCARD from _zeitgeist.engine.datamodel import Event, Subject from _zeitgeist.engine.extension import ExtensionsCollection, load_class from _zeitgeist.engine import constants @@ -44,6 +44,12 @@ logging.basicConfig(level=logging.DEBUG) log = logging.getLogger("zeitgeist.engine") +class NegationNotSupported(ValueError): + pass + +class WildcardNotSupported(ValueError): + pass + def parse_negation(kind, field, value, parse_negation=True): """checks if value starts with the negation operator, if value starts with the negation operator but the field does @@ -55,8 +61,42 @@ negation = True value = value[len(NEGATION_OPERATOR):] if negation and field not in kind.SUPPORTS_NEGATION: - raise ValueError("This field does not support negation") + raise NegationNotSupported("This field does not support negation") return value, negation + +def parse_wildcard(kind, field, value): + """checks if value ends with the a wildcard, + if value ends with a wildcard but the field does not support wildcards + a ValueError is raised. + This function returns a (value_without_wildcard, wildcard)-tuple + """ + wildcard = False + if value.endswith(WILDCARD): + wildcard = True + value = value[:-len(WILDCARD)] + if wildcard and field not in kind.SUPPORTS_WILDCARDS: + raise WildcardNotSupported("This field does not support wildcards") + return value, wildcard + +def parse_operators(kind, field, value): + """runs both (parse_negation and parse_wildcard) parser functions + on query values, and handles the special case of Subject.Text correctly. + returns a (value_without_negation_and_wildcard, negation, wildcard)-tuple + """ + try: + value, negation = parse_negation(kind, field, value) + except ValueError: + if kind is Subject and field == Subject.Text: + # we do not support negation of the text field, + # the text field starts with the NEGATION_OPERATOR + # so we handle this string as the content instead + # of an operator + negation = False + else: + raise + value, wildcard = parse_wildcard(kind, field, value) + return value, negation, wildcard + class ZeitgeistEngine: @@ -186,58 +226,57 @@ subwhere.add("id = ?", event_template.id) try: - value, negation = parse_negation(Event, Event.Interpretation, event_template.interpretation) + value, negation, wildcard = parse_operators(Event, Event.Interpretation, event_template.interpretation) # Expand event interpretation children event_interp_where = WhereClause(WhereClause.OR, negation) for child_interp in (Symbol.find_child_uris_extended(value)): if child_interp: - event_interp_where.add("interpretation = ?", - self._interpretation[child_interp]) + event_interp_where.add_text_condition("interpretation", + child_interp, like=wildcard, cache=self._interpretation) if event_interp_where: subwhere.extend(event_interp_where) - value, negation = parse_negation(Event, Event.Manifestation, event_template.manifestation) + value, negation, wildcard = parse_operators(Event, Event.Manifestation, event_template.manifestation) # Expand event manifestation children event_manif_where = WhereClause(WhereClause.OR, negation) for child_manif in (Symbol.find_child_uris_extended(value)): if child_manif: - event_manif_where.add("manifestation = ?", - self._manifestation[child_manif]) + event_manif_where.add_text_condition("manifestation", + child_manif, like=wildcard, cache=self._manifestation) if event_manif_where: subwhere.extend(event_manif_where) - value, negation = parse_negation(Subject, Subject.Interpretation, subject_template.interpretation) + value, negation, wildcard = parse_operators(Subject, Subject.Interpretation, subject_template.interpretation) # Expand subject interpretation children su_interp_where = WhereClause(WhereClause.OR, negation) for child_interp in (Symbol.find_child_uris_extended(value)): if child_interp: - su_interp_where.add("subj_interpretation = ?", - self._interpretation[child_interp]) + su_interp_where.add_text_condition("subj_interpretation", + child_interp, like=wildcard, cache=self._interpretation) if su_interp_where: subwhere.extend(su_interp_where) - value, negation = parse_negation(Subject, Subject.Manifestation, subject_template.manifestation) + value, negation, wildcard = parse_operators(Subject, Subject.Manifestation, subject_template.manifestation) # Expand subject manifestation children su_manif_where = WhereClause(WhereClause.OR, negation) for child_manif in (Symbol.find_child_uris_extended(value)): if child_manif: - su_manif_where.add("subj_manifestation = ?", - self._manifestation[child_manif]) + su_manif_where.add_text_condition("subj_manifestation", + child_manif, like=wildcard, cache=self._manifestation) if su_manif_where: subwhere.extend(su_manif_where) # FIXME: Expand mime children as well. # Right now we only do exact matching for mimetypes # thekorn: this will be fixed when wildcards are supported - value, negation = parse_negation(Subject, Subject.Mimetype, subject_template.mimetype) + value, negation, wildcard = parse_operators(Subject, Subject.Mimetype, subject_template.mimetype) if value: - subwhere.add("subj_mimetype %s= ?" %(NEGATION_OPERATOR if negation else ""), - self._mimetype[value]) + subwhere.add_text_condition("subj_mimetype", + value, wildcard, negation, cache=self._mimetype) - value, negation = parse_negation(Event, Event.Actor, event_template.actor) + value, negation, wildcard = parse_operators(Event, Event.Actor, event_template.actor) if value: - subwhere.add("actor %s= ?" %(NEGATION_OPERATOR if negation else ""), - self._actor[value]) + subwhere.add_text_condition("actor", value, wildcard, negation, cache=self._actor) except KeyError, e: # Value not in DB log.debug("Unknown entity in query: %s" % e) @@ -247,18 +286,8 @@ for key in ("uri", "origin", "text"): value = getattr(subject_template, key) if value: - try: - value, negation = parse_negation(Subject, getattr(Subject, key.title()), value) - except ValueError: - if key == "text": - # we do not support negation of the text field, - # the text field starts with the NEGATION_OPERATOR - # so we handle this string as the content instead - # of an operator - negation = False - else: - raise - subwhere.add("subj_%s %s= ?" %(key, NEGATION_OPERATOR if negation else ""), value) + value, negation, wildcard = parse_operators(Subject, getattr(Subject, key.title()), value) + subwhere.add_text_condition("subj_%s" %key, value, wildcard, negation) where_or.extend(subwhere) return where_or === modified file '_zeitgeist/engine/sql.py' --- _zeitgeist/engine/sql.py 2010-05-13 11:46:31 +0000 +++ _zeitgeist/engine/sql.py 2010-05-14 17:20:42 +0000 @@ -28,6 +28,12 @@ logging.basicConfig(level=logging.DEBUG) log = logging.getLogger("zeitgeist.sql") +TABLE_MAP = { + "subj_mimetype": "mimetype", + "subj_origin": "uri", + "subj_uri": "uri", +} + class UnicodeCursor(sqlite3.Cursor): @staticmethod @@ -366,6 +372,31 @@ self.arguments.append(arguments) else: self.arguments.extend(arguments) + + def add_text_condition(self, column, value, like=False, negation=False, cache=None): + if like: + # thekorn: unfortunatly the data in event_view is a bit inconsistent + # e.g.: + # subj_uri and subj_origin are presented as string-values + # actor and subj_mimetype are ids + # (LP: #580601) + if column in ("subj_uri", "subj_origin"): + value_type = "value" + elif column in ("actor", "subj_mimetype"): + value_type = "id" + else: + raise AssertionError("We don't know how to handle this type of data") + # thekorn: this is a first (unoptimized version) + # see http://www.sqlite.org/optoverview.html '4.0 The LIKE optimization' + # for how this will look in the future + sql = "%s %sIN (SELECT %s FROM %s WHERE value GLOB ?)" \ + %(column, self.NOT if negation else "", value_type, TABLE_MAP.get(column, column)) + value += "*" + else: + sql = "%s %s= ?" %(column, "!" if negation else "") + if cache is not None: + value = cache[value] + self.add(sql, value) def extend(self, where): self.add(where.sql, where.arguments) === modified file 'test/datamodel-test.py' --- test/datamodel-test.py 2010-05-14 11:54:52 +0000 +++ test/datamodel-test.py 2010-05-14 17:20:42 +0000 @@ -304,6 +304,42 @@ event = Event.new_for_values(timestamp=1000, subject_storage="sometext") template = Event.new_for_values(subject_storage="xxxx") self.assertRaises(ValueError, template.matches_event, event) + + def testWildcardTemplateMatching(self): + event = Event.new_for_values(actor="boo bar") + + template = Event.new_for_values(actor="boo*") + self.assertTrue(event.matches_template(template)) + + # wildcards are not supported in interpretation, + # so they are handled as content + event = Event.new_for_values(interpretation="boo bar") + + template = Event.new_for_values(interpretation="boo*") + self.assertFalse(event.matches_template(template)) + + event = Event.new_for_values(subject_uri="boo bar") + + template = Event.new_for_values(subject_uri="boo*") + self.assertTrue(event.matches_template(template)) + + event = Event.new_for_values(subject_origin="boo bar") + + template = Event.new_for_values(subject_origin="boo*") + self.assertTrue(event.matches_template(template)) + + event = Event.new_for_values(subject_mimetype="boo bar") + + template = Event.new_for_values(subject_mimetype="boo*") + self.assertTrue(event.matches_template(template)) + + def testNegationWildcardTemplateMatching(self): + event = Event.new_for_values(actor="boo bar") + + template = Event.new_for_values(actor="!boo*") + self.assertFalse(event.matches_template(template)) + template = Event.new_for_values(actor="!test*") + self.assertTrue(event.matches_template(template)) class TimeRangeTest (unittest.TestCase): === modified file 'test/engine-test.py' --- test/engine-test.py 2010-05-14 11:54:52 +0000 +++ test/engine-test.py 2010-05-14 17:20:42 +0000 @@ -763,6 +763,51 @@ TimeRange.always(), [template], StorageState.Any, 10, ResultType.MostRecentEvents ) + + def testWildcard(self): + import_events("test/data/five_events.js", self.engine) + + template = Event.new_for_values( + actor = "ge*" + ) + ids = self.engine.find_eventids(TimeRange.always(), + [template,], StorageState.Any, 10, ResultType.MostRecentEvents + ) + self.assertEquals(2, len(ids)) + + template = Event.new_for_values( + actor = "!ge*" + ) + ids = self.engine.find_eventids(TimeRange.always(), + [template,], StorageState.Any, 10, ResultType.MostRecentEvents + ) + self.assertEquals(3, len(ids)) + + template = Event.new_for_values( + subject_mimetype = "text/*" + ) + ids = self.engine.find_eventids(TimeRange.always(), + [template,], StorageState.Any, 10, ResultType.MostRecentEvents + ) + self.assertEquals(5, len(ids)) + + template = Event.new_for_values( + subject_uri = "http://*" + ) + + ids = self.engine.find_eventids(TimeRange.always(), + [template,], StorageState.Any, 10, ResultType.MostRecentEvents + ) + self.assertEquals(1, len(ids)) + + template = Event.new_for_values( + subject_origin = "file://*" + ) + + ids = self.engine.find_eventids(TimeRange.always(), + [template,], StorageState.Any, 10, ResultType.MostRecentEvents + ) + self.assertEquals(5, len(ids)) if __name__ == "__main__": unittest.main() === modified file 'test/test-sql.py' --- test/test-sql.py 2010-05-13 13:12:12 +0000 +++ test/test-sql.py 2010-05-14 17:20:42 +0000 @@ -66,6 +66,27 @@ self.assertEquals(where.sql % tuple(where.arguments), "(foo = 10 AND NOT (subfoo = 68 OR subbar = 69) AND bar = 11)") + + def testAddTextCondition(self): + where = WhereClause(WhereClause.AND) + where.add_text_condition("boo", "bar") + self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments), + "(boo = bar)") + + where = WhereClause(WhereClause.AND) + where.add_text_condition("boo", "bar", negation=True) + self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments), + "(boo != bar)") + + where = WhereClause(WhereClause.AND) + where.add_text_condition("boo", "bar", like=True) + self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments), + "(boo IN (SELECT id FROM boo WHERE value GLOB bar*))") + + where = WhereClause(WhereClause.AND) + where.add_text_condition("boo", "bar", like=True, negation=True) + self.assertEquals(where.sql.replace("?", "%s") % tuple(where.arguments), + "(boo NOT IN (SELECT id FROM boo WHERE value GLOB bar*))") if __name__ == "__main__": === modified file 'zeitgeist/datamodel.py' --- zeitgeist/datamodel.py 2010-05-14 11:54:52 +0000 +++ zeitgeist/datamodel.py 2010-05-14 17:20:42 +0000 @@ -40,10 +40,15 @@ ] NEGATION_OPERATOR = "!" +WILDCARD = "*" def EQUAL(x, y): """checks if both given arguments are equal""" return x == y + +def STARTSWITH(x, y): + """checks if 'x' startswith 'y'""" + return x.startswith(y) # next() function is python >= 2.6 try: @@ -436,6 +441,7 @@ Storage) = range(7) SUPPORTS_NEGATION = (Uri, Interpretation, Manifestation, Origin, Mimetype) + SUPPORTS_WILDCARDS = (Uri, Origin, Mimetype) def __init__(self, data=None): super(Subject, self).__init__([""]*len(Subject.Fields)) @@ -560,6 +566,10 @@ if field_id in self.SUPPORTS_NEGATION \ and expression.startswith(NEGATION_OPERATOR): return not self._check_field_match(field_id, expression[len(NEGATION_OPERATOR):], comp) + elif field_id in self.SUPPORTS_WILDCARDS \ + and expression.endswith(WILDCARD): + assert comp == EQUAL, "wildcards only work for pure text fields" + return self._check_field_match(field_id, expression[:-len(WILDCARD)], STARTSWITH) else: return comp(self[field_id], expression) @@ -585,6 +595,7 @@ Actor) = range(5) SUPPORTS_NEGATION = (Interpretation, Manifestation, Actor) + SUPPORTS_WILDCARDS = (Actor,) def __init__(self, struct = None): """ @@ -833,6 +844,10 @@ if field_id in self.SUPPORTS_NEGATION \ and expression.startswith(NEGATION_OPERATOR): return not self._check_field_match(field_id, expression[len(NEGATION_OPERATOR):], comp) + elif field_id in self.SUPPORTS_WILDCARDS \ + and expression.endswith(WILDCARD): + assert comp == EQUAL, "wildcards only work for pure text fields" + return self._check_field_match(field_id, expression[:-len(WILDCARD)], STARTSWITH) else: return comp(self[0][field_id], expression)
_______________________________________________ Mailing list: https://launchpad.net/~zeitgeist Post to : zeitgeist@lists.launchpad.net Unsubscribe : https://launchpad.net/~zeitgeist More help : https://help.launchpad.net/ListHelp