Hello: I am trying to build a complicated relation, but am having trouble due to the following: * there are multiple intermediate tables involved * the relation requires EXISTS clauses as the join condition between several of these tables
My model is shown below. The relation I am trying to build is RunFilter.matching_runs, which is effectively a many-to-many relationship between RunFilter and Run. Currently, I'm using raw SQL to model this relationship, as shown at the end of the example. Ideally, I would like to build this relationship on top of other relationships, so that RunFilter.matching_runs would use RunFilter.var_clauses.any() and VarClause.matching_run_items.any() in its join condition, and VarClause.matching_run_items would use VarClause.values.any(). I realize that SQLAlchemy does not support this because mapper properties are not compiled in any predictable order, but it would be nice. import sqlalchemy as sa from sqlalchemy.ext.associationproxy import association_proxy from sqlalchemy.ext.declarative import declarative_base import sqlalchemy.orm as orm from sqlalchemy.orm.collections import attribute_mapped_collection Base = declarative_base() # Association tables var_clause_to_value = sa.Table("var_clause_to_value", Base.metadata, sa.Column("value_id", sa.Integer, sa.ForeignKey("value.id"), primary_key=True), sa.Column("var_clause_id", sa.Integer, sa.ForeignKey ("var_clause.id"), primary_key=True)) # Entities class Variable(Base): __tablename__ = "variable" # Fields id = sa.Column(sa.Integer, primary_key=True) # In the real world, we would have a `name` attribute as well, but it's not # important for this example. # Relations values = orm.relation("Value") class Value(Base): """ Each variable can be attached to zero or more values. """ __tablename__ = "value" # Fields id = sa.Column(sa.Integer, primary_key=True) variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"), nullable=False) # In the real world, we would have a `name` attribute as well, but it's not # important for this example. # Relations variable = orm.relation("Variable") class Run(Base): """ A run consists of a combination of inputs (variable=value pairs) and an output (result). """ __tablename__ = "run" def __str__(self): return "run%d" % self.id # Fields id = sa.Column(sa.Integer, primary_key=True) result = sa.Column(sa.Unicode, nullable=False) # Relations var2item = orm.relation("RunItem", collection_class=attribute_mapped_collection("variable")) var2value = association_proxy( "var2item", "value", creator=lambda var, value: RunItem(value=value, variable=var)) class RunItem(Base): """ A single variable=value pair for a given run. """ __tablename__ = "run_item" # Fields run_id = sa.Column(sa.Integer, sa.ForeignKey("run.id"), primary_key=True) value_id = sa.Column(sa.Integer, sa.ForeignKey("value.id"), primary_key=True) variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"), primary_key=True) # Relations run = orm.relation("Run") value = orm.relation("Value") variable = orm.relation("Variable") class RunFilter(Base): """ A user-defined query for runs. For example, if you want to find all runs where var1=value1 and var2=value2, you could use:: >>> filter = RunFilter(var_clauses=[VarClause(variable=var1, values=[value1]), ... VarClause(variable=var2, values=[value2])]) Note that for a run filter to match a run, *all* of its var clauses have to be satisfied. """ __tablename__ = "run_filter" def __str__(self): return "run_filter%d" % self.id # Fields id = sa.Column(sa.Integer, primary_key=True) # Relations var_clauses = orm.relation("VarClause") # Desired SQL for `matching_runs`: # (assume `run` and `run_filter` are correlated to an outer query) # NOT EXISTS (SELECT 1 FROM var_clause # WHERE var_clause.run_filter_id = run_filter.id # AND NOT EXISTS (SELECT 1 FROM run_item # WHERE run_item.variable_id = var_clause.variable_id # AND EXISTS (SELECT 1 FROM var_clause_to_value # WHERE var_clause_to_value.var_clause_id = var_clause.id # AND var_clause_to_value.value_id = run_item.value_id))) # # I would like to define this relation as (using `if False` to effectively # comment it out but retain syntax highlighting): if False: def _primaryjoin(): return ~RunFilter.var_clauses.any( ~VarClause.matching_run_items.any(RunItem.run_id == Run.id)) matching_runs = orm.relation("Value", primaryjoin=_primaryjoin) del _primaryjoin class VarClause(Base): """ A VarClause matches a RunItem iff: * RunItem.variable_id == VarClause.variable_id * AND RunItem.value in VarClause.values Essentially it is saying "Run.var2value[var1] = value1 OR Run.var2value[var1] = value2 OR ...". """ __tablename__ = "var_clause" # Fields id = sa.Column(sa.Integer, primary_key=True) run_filter_id = sa.Column(sa.Integer, sa.ForeignKey ("run_filter.id"), nullable=False) variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"), nullable=False) # Relations run_filter = orm.relation("RunFilter") values = orm.relation("Value", secondary=var_clause_to_value) variable = orm.relation("Variable") # Desired SQL for `matching_run_items`: # (assume `var_clause` and `run_item` are correlated to an outer query) # run_item.variable_id = var_clause.variable_id # AND EXISTS (SELECT 1 FROM var_clause_to_value # WHERE var_clause_to_value.var_clause_id = var_clause.id # AND var_clause_to_value.value_id = run_item.value_id) # # I would like to define this relation as: if False: def _primaryjoin(): return ((RunItem.variable_id == VarClause.variable_id) & (VarClause.values.any(Value.id == RunItem.value_id))) matching_run_items = orm.relation("RunItem", primaryjoin=_primaryjoin) del _primaryjoin # Test Case engine = sa.create_engine("sqlite:///:memory:") Base.metadata.create_all(engine) session = orm.create_session(engine, autocommit=False, autoflush=True) var1 = Variable() value1_1 = Value(variable=var1) value1_2 = Value(variable=var1) session.add_all([var1, value1_1, value1_2]) var2 = Variable() value2_1 = Value(variable=var2) value2_2 = Value(variable=var2) session.add_all([var2, value2_1, value2_2]) run1 = Run(id=1, result=u"good") run1.var2value[var1] = value1_1 run1.var2value[var2] = value2_1 run2 = Run(id=2, result=u"bad") run2.var2value[var1] = value1_2 run2.var2value[var2] = value2_2 session.add_all([run1, run2]) # run_filter1 should match run1. run_filter1 = RunFilter(id=1) run_filter1.var_clauses.append(VarClause(variable=var1, values= [value1_1, value1_2])) run_filter1.var_clauses.append(VarClause(variable=var2, values= [value2_1])) session.add(run_filter1) # run_filter2 should match run1 and run2. run_filter2 = RunFilter(id=2) run_filter2.var_clauses.append(VarClause(variable=var1, values= [value1_1, value1_2])) run_filter2.var_clauses.append(VarClause(variable=var2, values= [value2_1, value2_2])) session.add(run_filter2) # run_filter3 should match no runs. run_filter3 = RunFilter(id=3) run_filter3.var_clauses.append(VarClause(variable=var1, values= [value1_2])) run_filter3.var_clauses.append(VarClause(variable=var2, values= [value2_1])) session.add(run_filter3) session.flush() engine.echo = True if False: # Query all run filters and eager-load their matching runs. q = session.query(RunFilter) q = q.options(orm.eagerload("matching_runs")) run_filter2run = dict((x, x.runs) for x in q) else: # Do it manually, sort of. q = session.query(RunFilter, Run) q = q.from_statement( """SELECT run_filter.id AS run_filter_id, run.id AS run_id FROM run_filter, run WHERE NOT EXISTS (SELECT 1 FROM var_clause WHERE var_clause.run_filter_id = run_filter.id AND NOT EXISTS (SELECT 1 FROM run_item WHERE run_item.run_id = run.id AND run_item.variable_id = var_clause.variable_id AND EXISTS (SELECT 1 FROM var_clause_to_value WHERE var_clause_to_value.var_clause_id = var_clause.id AND var_clause_to_value.value_id = run_item.value_id))) """) run_filter2run = {} for (run_filter, run) in q: runs = run_filter2run.setdefault(run_filter, []) runs.append(run) for (run_filter, runs) in run_filter2run.iteritems(): print "%s matching runs: %s" % (run_filter, ", ".join(str(run) for run in runs)) --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "sqlalchemy" group. To post to this group, send email to sqlalchemy@googlegroups.com To unsubscribe from this group, send email to sqlalchemy+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/sqlalchemy?hl=en -~----------~----~----~----~------~----~------~--~---