Hello:

I am trying to build a complicated relation, but am having trouble due
to the following:
* there are multiple intermediate tables involved
* the relation requires EXISTS clauses as the join condition between
several of these tables

My model is shown below. The relation I am trying to build is
RunFilter.matching_runs, which is effectively a many-to-many
relationship between RunFilter and Run. Currently, I'm using raw SQL
to model this relationship, as shown at the end of the example.

Ideally, I would like to build this relationship on top of other
relationships, so that RunFilter.matching_runs would use
RunFilter.var_clauses.any() and VarClause.matching_run_items.any() in
its join condition, and VarClause.matching_run_items would use
VarClause.values.any(). I realize that SQLAlchemy does not support
this because mapper properties are not compiled in any predictable
order, but it would be nice.

import sqlalchemy as sa
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
import sqlalchemy.orm as orm
from sqlalchemy.orm.collections import attribute_mapped_collection

Base = declarative_base()

# Association tables
var_clause_to_value = sa.Table("var_clause_to_value", Base.metadata,
    sa.Column("value_id", sa.Integer, sa.ForeignKey("value.id"),
primary_key=True),
    sa.Column("var_clause_id", sa.Integer, sa.ForeignKey
("var_clause.id"), primary_key=True))

# Entities
class Variable(Base):
    __tablename__ = "variable"

    # Fields
    id = sa.Column(sa.Integer, primary_key=True)
    # In the real world, we would have a `name` attribute as well, but
it's not
    # important for this example.

    # Relations
    values = orm.relation("Value")

class Value(Base):
    """
    Each variable can be attached to zero or more values.
    """
    __tablename__ = "value"

    # Fields
    id = sa.Column(sa.Integer, primary_key=True)
    variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"),
                            nullable=False)
    # In the real world, we would have a `name` attribute as well, but
it's not
    # important for this example.

    # Relations
    variable = orm.relation("Variable")

class Run(Base):
    """
    A run consists of a combination of inputs (variable=value pairs)
and an
    output (result).
    """
    __tablename__ = "run"

    def __str__(self):
        return "run%d" % self.id

    # Fields
    id = sa.Column(sa.Integer, primary_key=True)
    result = sa.Column(sa.Unicode, nullable=False)

    # Relations
    var2item = orm.relation("RunItem",
 
collection_class=attribute_mapped_collection("variable"))
    var2value = association_proxy(
        "var2item",
        "value",
        creator=lambda var, value: RunItem(value=value, variable=var))

class RunItem(Base):
    """
    A single variable=value pair for a given run.
    """
    __tablename__ = "run_item"

    # Fields
    run_id = sa.Column(sa.Integer, sa.ForeignKey("run.id"),
primary_key=True)
    value_id = sa.Column(sa.Integer, sa.ForeignKey("value.id"),
primary_key=True)
    variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"),
primary_key=True)

    # Relations
    run = orm.relation("Run")
    value = orm.relation("Value")
    variable = orm.relation("Variable")

class RunFilter(Base):
    """
    A user-defined query for runs. For example, if you want to find
all runs
    where var1=value1 and var2=value2, you could use::

    >>> filter = RunFilter(var_clauses=[VarClause(variable=var1,
values=[value1]),
    ...                                 VarClause(variable=var2,
values=[value2])])

    Note that for a run filter to match a run, *all* of its var
clauses have
    to be satisfied.
    """
    __tablename__ = "run_filter"

    def __str__(self):
        return "run_filter%d" % self.id

    # Fields
    id = sa.Column(sa.Integer, primary_key=True)

    # Relations
    var_clauses = orm.relation("VarClause")

    # Desired SQL for `matching_runs`:
    # (assume `run` and `run_filter` are correlated to an outer query)
    # NOT EXISTS (SELECT 1 FROM var_clause
    #             WHERE var_clause.run_filter_id = run_filter.id
    #             AND NOT EXISTS (SELECT 1 FROM run_item
    #                             WHERE run_item.variable_id =
var_clause.variable_id
    #                             AND EXISTS (SELECT 1 FROM
var_clause_to_value
    #                                         WHERE
var_clause_to_value.var_clause_id = var_clause.id
    #                                         AND
var_clause_to_value.value_id = run_item.value_id)))
    #
    # I would like to define this relation as (using `if False` to
effectively
    # comment it out but retain syntax highlighting):
    if False:
        def _primaryjoin():
            return ~RunFilter.var_clauses.any(
                ~VarClause.matching_run_items.any(RunItem.run_id ==
Run.id))
        matching_runs = orm.relation("Value",
primaryjoin=_primaryjoin)
        del _primaryjoin

class VarClause(Base):
    """
    A VarClause matches a RunItem iff:
    * RunItem.variable_id == VarClause.variable_id
    * AND RunItem.value in VarClause.values

    Essentially it is saying "Run.var2value[var1] = value1
                              OR Run.var2value[var1] = value2
                              OR ...".
    """
    __tablename__ = "var_clause"

    # Fields
    id = sa.Column(sa.Integer, primary_key=True)
    run_filter_id = sa.Column(sa.Integer, sa.ForeignKey
("run_filter.id"),
                              nullable=False)
    variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"),
                            nullable=False)

    # Relations
    run_filter = orm.relation("RunFilter")
    values = orm.relation("Value", secondary=var_clause_to_value)
    variable = orm.relation("Variable")

    # Desired SQL for `matching_run_items`:
    # (assume `var_clause` and `run_item` are correlated to an outer
query)
    # run_item.variable_id = var_clause.variable_id
    # AND EXISTS (SELECT 1 FROM var_clause_to_value
    #             WHERE var_clause_to_value.var_clause_id =
var_clause.id
    #             AND var_clause_to_value.value_id =
run_item.value_id)
    #
    # I would like to define this relation as:
    if False:
        def _primaryjoin():
            return ((RunItem.variable_id == VarClause.variable_id)
                    & (VarClause.values.any(Value.id ==
RunItem.value_id)))
        matching_run_items = orm.relation("RunItem",
primaryjoin=_primaryjoin)
        del _primaryjoin

# Test Case
engine = sa.create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
session = orm.create_session(engine, autocommit=False, autoflush=True)

var1 = Variable()
value1_1 = Value(variable=var1)
value1_2 = Value(variable=var1)
session.add_all([var1, value1_1, value1_2])

var2 = Variable()
value2_1 = Value(variable=var2)
value2_2 = Value(variable=var2)
session.add_all([var2, value2_1, value2_2])

run1 = Run(id=1, result=u"good")
run1.var2value[var1] = value1_1
run1.var2value[var2] = value2_1
run2 = Run(id=2, result=u"bad")
run2.var2value[var1] = value1_2
run2.var2value[var2] = value2_2
session.add_all([run1, run2])

# run_filter1 should match run1.
run_filter1 = RunFilter(id=1)
run_filter1.var_clauses.append(VarClause(variable=var1, values=
[value1_1, value1_2]))
run_filter1.var_clauses.append(VarClause(variable=var2, values=
[value2_1]))
session.add(run_filter1)

# run_filter2 should match run1 and run2.
run_filter2 = RunFilter(id=2)
run_filter2.var_clauses.append(VarClause(variable=var1, values=
[value1_1, value1_2]))
run_filter2.var_clauses.append(VarClause(variable=var2, values=
[value2_1, value2_2]))
session.add(run_filter2)

# run_filter3 should match no runs.
run_filter3 = RunFilter(id=3)
run_filter3.var_clauses.append(VarClause(variable=var1, values=
[value1_2]))
run_filter3.var_clauses.append(VarClause(variable=var2, values=
[value2_1]))
session.add(run_filter3)

session.flush()

engine.echo = True

if False:
    # Query all run filters and eager-load their matching runs.
    q = session.query(RunFilter)
    q = q.options(orm.eagerload("matching_runs"))
    run_filter2run = dict((x, x.runs) for x in q)
else:
    # Do it manually, sort of.
    q = session.query(RunFilter, Run)
    q = q.from_statement(
"""SELECT run_filter.id AS run_filter_id,
          run.id AS run_id
   FROM run_filter, run
   WHERE NOT EXISTS (SELECT 1 FROM var_clause
                     WHERE var_clause.run_filter_id = run_filter.id
                     AND NOT EXISTS (SELECT 1 FROM run_item
                                     WHERE run_item.run_id = run.id
                                     AND run_item.variable_id =
var_clause.variable_id
                                     AND EXISTS (SELECT 1 FROM
var_clause_to_value
                                                 WHERE
var_clause_to_value.var_clause_id = var_clause.id
                                                 AND
var_clause_to_value.value_id = run_item.value_id)))
""")
    run_filter2run = {}
    for (run_filter, run) in q:
        runs = run_filter2run.setdefault(run_filter, [])
        runs.append(run)

for (run_filter, runs) in run_filter2run.iteritems():
    print "%s matching runs: %s" % (run_filter, ", ".join(str(run) for
run in runs))

--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"sqlalchemy" group.
To post to this group, send email to sqlalchemy@googlegroups.com
To unsubscribe from this group, send email to 
sqlalchemy+unsubscr...@googlegroups.com
For more options, visit this group at 
http://groups.google.com/group/sqlalchemy?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to