Conor wrote: > > Hello: > > I am trying to build a complicated relation, but am having trouble due > to the following: > * there are multiple intermediate tables involved > * the relation requires EXISTS clauses as the join condition between > several of these tables > > My model is shown below. The relation I am trying to build is > RunFilter.matching_runs, which is effectively a many-to-many > relationship between RunFilter and Run. Currently, I'm using raw SQL > to model this relationship, as shown at the end of the example. > > Ideally, I would like to build this relationship on top of other > relationships, so that RunFilter.matching_runs would use > RunFilter.var_clauses.any() and VarClause.matching_run_items.any() in > its join condition, and VarClause.matching_run_items would use > VarClause.values.any(). I realize that SQLAlchemy does not support > this because mapper properties are not compiled in any predictable > order, but it would be nice.
have you considered the advice at http://www.sqlalchemy.org/docs/05/mappers.html#building-query-enabled-properties ? an embedded EXISTS subquery in a relation() doesn't seem like something that should be emitted implicitly - hand-constructing the appropriate Query on a case by case basis would lead to better results. > > import sqlalchemy as sa > from sqlalchemy.ext.associationproxy import association_proxy > from sqlalchemy.ext.declarative import declarative_base > import sqlalchemy.orm as orm > from sqlalchemy.orm.collections import attribute_mapped_collection > > Base = declarative_base() > > # Association tables > var_clause_to_value = sa.Table("var_clause_to_value", Base.metadata, > sa.Column("value_id", sa.Integer, sa.ForeignKey("value.id"), > primary_key=True), > sa.Column("var_clause_id", sa.Integer, sa.ForeignKey > ("var_clause.id"), primary_key=True)) > > # Entities > class Variable(Base): > __tablename__ = "variable" > > # Fields > id = sa.Column(sa.Integer, primary_key=True) > # In the real world, we would have a `name` attribute as well, but > it's not > # important for this example. > > # Relations > values = orm.relation("Value") > > class Value(Base): > """ > Each variable can be attached to zero or more values. > """ > __tablename__ = "value" > > # Fields > id = sa.Column(sa.Integer, primary_key=True) > variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"), > nullable=False) > # In the real world, we would have a `name` attribute as well, but > it's not > # important for this example. > > # Relations > variable = orm.relation("Variable") > > class Run(Base): > """ > A run consists of a combination of inputs (variable=value pairs) > and an > output (result). > """ > __tablename__ = "run" > > def __str__(self): > return "run%d" % self.id > > # Fields > id = sa.Column(sa.Integer, primary_key=True) > result = sa.Column(sa.Unicode, nullable=False) > > # Relations > var2item = orm.relation("RunItem", > > collection_class=attribute_mapped_collection("variable")) > var2value = association_proxy( > "var2item", > "value", > creator=lambda var, value: RunItem(value=value, variable=var)) > > class RunItem(Base): > """ > A single variable=value pair for a given run. > """ > __tablename__ = "run_item" > > # Fields > run_id = sa.Column(sa.Integer, sa.ForeignKey("run.id"), > primary_key=True) > value_id = sa.Column(sa.Integer, sa.ForeignKey("value.id"), > primary_key=True) > variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"), > primary_key=True) > > # Relations > run = orm.relation("Run") > value = orm.relation("Value") > variable = orm.relation("Variable") > > class RunFilter(Base): > """ > A user-defined query for runs. For example, if you want to find > all runs > where var1=value1 and var2=value2, you could use:: > > >>> filter = RunFilter(var_clauses=[VarClause(variable=var1, > values=[value1]), > ... VarClause(variable=var2, > values=[value2])]) > > Note that for a run filter to match a run, *all* of its var > clauses have > to be satisfied. > """ > __tablename__ = "run_filter" > > def __str__(self): > return "run_filter%d" % self.id > > # Fields > id = sa.Column(sa.Integer, primary_key=True) > > # Relations > var_clauses = orm.relation("VarClause") > > # Desired SQL for `matching_runs`: > # (assume `run` and `run_filter` are correlated to an outer query) > # NOT EXISTS (SELECT 1 FROM var_clause > # WHERE var_clause.run_filter_id = run_filter.id > # AND NOT EXISTS (SELECT 1 FROM run_item > # WHERE run_item.variable_id = > var_clause.variable_id > # AND EXISTS (SELECT 1 FROM > var_clause_to_value > # WHERE > var_clause_to_value.var_clause_id = var_clause.id > # AND > var_clause_to_value.value_id = run_item.value_id))) > # > # I would like to define this relation as (using `if False` to > effectively > # comment it out but retain syntax highlighting): > if False: > def _primaryjoin(): > return ~RunFilter.var_clauses.any( > ~VarClause.matching_run_items.any(RunItem.run_id == > Run.id)) > matching_runs = orm.relation("Value", > primaryjoin=_primaryjoin) > del _primaryjoin > > class VarClause(Base): > """ > A VarClause matches a RunItem iff: > * RunItem.variable_id == VarClause.variable_id > * AND RunItem.value in VarClause.values > > Essentially it is saying "Run.var2value[var1] = value1 > OR Run.var2value[var1] = value2 > OR ...". > """ > __tablename__ = "var_clause" > > # Fields > id = sa.Column(sa.Integer, primary_key=True) > run_filter_id = sa.Column(sa.Integer, sa.ForeignKey > ("run_filter.id"), > nullable=False) > variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"), > nullable=False) > > # Relations > run_filter = orm.relation("RunFilter") > values = orm.relation("Value", secondary=var_clause_to_value) > variable = orm.relation("Variable") > > # Desired SQL for `matching_run_items`: > # (assume `var_clause` and `run_item` are correlated to an outer > query) > # run_item.variable_id = var_clause.variable_id > # AND EXISTS (SELECT 1 FROM var_clause_to_value > # WHERE var_clause_to_value.var_clause_id = > var_clause.id > # AND var_clause_to_value.value_id = > run_item.value_id) > # > # I would like to define this relation as: > if False: > def _primaryjoin(): > return ((RunItem.variable_id == VarClause.variable_id) > & (VarClause.values.any(Value.id == > RunItem.value_id))) > matching_run_items = orm.relation("RunItem", > primaryjoin=_primaryjoin) > del _primaryjoin > > # Test Case > engine = sa.create_engine("sqlite:///:memory:") > Base.metadata.create_all(engine) > session = orm.create_session(engine, autocommit=False, autoflush=True) > > var1 = Variable() > value1_1 = Value(variable=var1) > value1_2 = Value(variable=var1) > session.add_all([var1, value1_1, value1_2]) > > var2 = Variable() > value2_1 = Value(variable=var2) > value2_2 = Value(variable=var2) > session.add_all([var2, value2_1, value2_2]) > > run1 = Run(id=1, result=u"good") > run1.var2value[var1] = value1_1 > run1.var2value[var2] = value2_1 > run2 = Run(id=2, result=u"bad") > run2.var2value[var1] = value1_2 > run2.var2value[var2] = value2_2 > session.add_all([run1, run2]) > > # run_filter1 should match run1. > run_filter1 = RunFilter(id=1) > run_filter1.var_clauses.append(VarClause(variable=var1, values= > [value1_1, value1_2])) > run_filter1.var_clauses.append(VarClause(variable=var2, values= > [value2_1])) > session.add(run_filter1) > > # run_filter2 should match run1 and run2. > run_filter2 = RunFilter(id=2) > run_filter2.var_clauses.append(VarClause(variable=var1, values= > [value1_1, value1_2])) > run_filter2.var_clauses.append(VarClause(variable=var2, values= > [value2_1, value2_2])) > session.add(run_filter2) > > # run_filter3 should match no runs. > run_filter3 = RunFilter(id=3) > run_filter3.var_clauses.append(VarClause(variable=var1, values= > [value1_2])) > run_filter3.var_clauses.append(VarClause(variable=var2, values= > [value2_1])) > session.add(run_filter3) > > session.flush() > > engine.echo = True > > if False: > # Query all run filters and eager-load their matching runs. > q = session.query(RunFilter) > q = q.options(orm.eagerload("matching_runs")) > run_filter2run = dict((x, x.runs) for x in q) > else: > # Do it manually, sort of. > q = session.query(RunFilter, Run) > q = q.from_statement( > """SELECT run_filter.id AS run_filter_id, > run.id AS run_id > FROM run_filter, run > WHERE NOT EXISTS (SELECT 1 FROM var_clause > WHERE var_clause.run_filter_id = run_filter.id > AND NOT EXISTS (SELECT 1 FROM run_item > WHERE run_item.run_id = run.id > AND run_item.variable_id = > var_clause.variable_id > AND EXISTS (SELECT 1 FROM > var_clause_to_value > WHERE > var_clause_to_value.var_clause_id = var_clause.id > AND > var_clause_to_value.value_id = run_item.value_id))) > """) > run_filter2run = {} > for (run_filter, run) in q: > runs = run_filter2run.setdefault(run_filter, []) > runs.append(run) > > for (run_filter, runs) in run_filter2run.iteritems(): > print "%s matching runs: %s" % (run_filter, ", ".join(str(run) for > run in runs)) > > > > --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "sqlalchemy" group. To post to this group, send email to sqlalchemy@googlegroups.com To unsubscribe from this group, send email to sqlalchemy+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/sqlalchemy?hl=en -~----------~----~----~----~------~----~------~--~---