On Mon, Jun 26, 2017 at 2:24 PM, Dave Vitek <[email protected]> wrote:

> In case anyone tries to use the compile visitor below, I'll document a
> couple snags I ran into.  For now, I think I'm satisfied to go back to my
> original patch and deal with any merge conflicts during upgrades.
>
> The hook has false positives when it runs on inner queries that correlate
> to tables from outer queries.  I think these could be addressed by
> borrowing most of the logic from _get_display_froms having to do with
> dropping extra FROMs to make this work right, although you would need to
> get the values for explicit_correlate_froms and implicit_correlate_froms
> from somewhere.  I'm sure it could be made to work eventually.
>

Here's a proof of concept for the "SELECT linter", this is totally
something that could work itself into some extension, flag, or other kind
of feature in SQLAlchemy but I'd need folks to help determine that this
does in fact work in all cases.  I wired it into the compiler to see what
the tests came up with, there's actually lots of tests that are only
testing stringification which don't attempt to join the FROM clauses and
these failed;  though I spot checked and it had the right answer in all
those cases.

from sqlalchemy import table, column, select
from sqlalchemy.sql.expression import Select
from sqlalchemy.sql import visitors
from sqlalchemy.ext.compiler import compiles
from sqlalchemy import util
import itertools
import collections


@compiles(Select)
def _lint_select(element, compiler, **kw):
    froms = set(element.froms)
    if not froms:
        return
    edges = set()

    # find all "a <operator> b", add that as edges
    def visit_binary(binary_element):

        edges.update(
            itertools.product(
                binary_element.left._from_objects,
                binary_element.right._from_objects
            )
        )

    # find all "a JOIN b", add "a" and "b" as froms
    def visit_join(join_element):
        if join_element in froms:
            froms.remove(join_element)
            froms.update((join_element.left, join_element.right))

    # unwrap "FromGrouping" objects, e.g. parentheized froms
    def visit_grouping(grouping_element):
        if grouping_element in froms:
            froms.remove(grouping_element)

            # the enclosed element will often be a JOIN.  The
visitors.traverse
            # does a depth-first outside-in traversal so the next
            # call will be visit_join() of this element :)
            froms.add(grouping_element.element)

    visitors.traverse(
        element, {}, {
            'binary': visit_binary, 'join': visit_join,
            'grouping': visit_grouping
        })

    # take any element from the list of FROMS.
    # then traverse all the edges and ensure we can reach
    # all other FROMS
    start_with = froms.pop()
    the_rest = froms
    stack = collections.deque([start_with])
    while stack and the_rest:
        node = stack.popleft()
        the_rest.discard(node)
        for edge in list(edges):
            if edge not in edges:
                continue
            elif edge[0] is node:
                edges.remove(edge)
                stack.appendleft(edge[1])
            elif edge[1] is node:
                edges.remove(edge)
                stack.appendleft(edge[0])

    # FROMS left over?  boom
    if the_rest:
        util.warn(
            "for stmt %s FROM elements %s are not "
            "joined up to FROM element %r" % (
                id(element),  # defeat the warnings filter
                ", ".join('"%r"' % f for f in the_rest),
                start_with
            )
        )
    return compiler.visit_select(element, **kw)

a = table('a', column('x'))
b = table('b', column('x'))
c = table('c', column('x'))
d = table('d', column('x'))


print "-------------------------------------"
print "everything is connected"
print select([a]).\
    select_from(a.join(b, a.c.x == b.c.x)).\
    select_from(c).select_from(d).\
    where(d.c.x == b.c.x).\
    where(c.c.x == d.c.x).where(c.c.x == 5)

print "-------------------------------------"
print "disconnect between a,b, c,d"
print select([a]).\
    select_from(a.join(b, a.c.x == b.c.x)).\
    select_from(c).select_from(d).\
    where(c.c.x == d.c.x).where(c.c.x == 5)

print "-------------------------------------"
print "c and d both disconnected"
print select([a]).\
    select_from(a.join(b, a.c.x == b.c.x)).\
    where(c.c.x == 5).where(d.c.x == 10)

print "-------------------------------------"
print "now connected"
print select([a]).\
    select_from(a.join(b, a.c.x == b.c.x)).\
    select_from(c.join(d, c.c.x == d.c.x)).\
    where(c.c.x == b.c.x).\
    where(c.c.x == 5).where(d.c.x == 10)


print "-----------------------------------"
print "test disconnected subquery"
subq = select([a]).where(a.c.x == b.c.x)
stmt = select([c]).select_from(subq)
print stmt

print "-----------------------------------"
print "now connect it"
subq = select([a]).where(a.c.x == b.c.x).alias()
stmt = select([c]).select_from(subq).where(c.c.x == subq.c.x)
print stmt


print "-----------------------------------"
print "right nested join w/o issue"

print select([a]).select_from(
    a.join(
        b.join(c, b.c.x == c.c.x), a.c.x == b.c.x
    )
)

print "-----------------------------------"
print "right nested join with an issue"

print select([a]).select_from(
    a.join(
        b.join(c, b.c.x == c.c.x), a.c.x == b.c.x
    )
).where(d.c.x == 5)


output:

-------------------------------------
everything is connected
SELECT a.x
FROM d, c, a JOIN b ON a.x = b.x
WHERE d.x = b.x AND c.x = d.x AND c.x = :x_1
-------------------------------------
disconnect between a,b, c,d
test.py:75: SAWarning: for stmt 139871395661008 FROM elements
"<sqlalchemy.sql.selectable.TableClause at 0x7f3658d93bd0; b>",
"<sqlalchemy.sql.selectable.TableClause at 0x7f3658d93a10; a>" are not
joined up to FROM element <sqlalchemy.sql.selectable.TableClause at
0x7f3658d93c90; c>
  start_with
SELECT a.x
FROM c, d, a JOIN b ON a.x = b.x
WHERE c.x = d.x AND c.x = :x_1
-------------------------------------
c and d both disconnected
test.py:75: SAWarning: for stmt 139871395662864 FROM elements
"<sqlalchemy.sql.selectable.TableClause at 0x7f3658d93bd0; b>",
"<sqlalchemy.sql.selectable.TableClause at 0x7f3658d93d50; d>",
"<sqlalchemy.sql.selectable.TableClause at 0x7f3658d93a10; a>" are not
joined up to FROM element <sqlalchemy.sql.selectable.TableClause at
0x7f3658d93c90; c>
  start_with
SELECT a.x
FROM c, d, a JOIN b ON a.x = b.x
WHERE c.x = :x_1 AND d.x = :x_2
-------------------------------------
now connected
SELECT a.x
FROM a JOIN b ON a.x = b.x, c JOIN d ON c.x = d.x
WHERE c.x = b.x AND c.x = :x_1 AND d.x = :x_2
-----------------------------------
test disconnected subquery
test.py:75: SAWarning: for stmt 139871395659920 FROM elements
"<sqlalchemy.sql.selectable.Select at 0x7f3658da4390; Select object>" are
not joined up to FROM element <sqlalchemy.sql.selectable.TableClause at
0x7f3658d93c90; c>
  start_with
SELECT c.x
FROM c, (SELECT a.x AS x
FROM a, b
WHERE a.x = b.x)
-----------------------------------
now connect it
SELECT c.x
FROM c, (SELECT a.x AS x
FROM a, b
WHERE a.x = b.x) AS anon_1
WHERE c.x = anon_1.x
-----------------------------------
right nested join w/o issue
SELECT a.x
FROM a JOIN (b JOIN c ON b.x = c.x) ON a.x = b.x
-----------------------------------
right nested join with an issue
test.py:75: SAWarning: for stmt 139871393147280 FROM elements
"<sqlalchemy.sql.selectable.TableClause at 0x7f3658d93d50; d>" are not
joined up to FROM element <sqlalchemy.sql.selectable.TableClause at
0x7f3658d93a10; a>
  start_with
SELECT a.x
FROM d, a JOIN (b JOIN c ON b.x = c.x) ON a.x = b.x
WHERE d.x = :x_1


>
> Additionally, it raises even in contexts where no sql is being executed.
> For example, if someone renders a yet-to-be-nested select to a string for
> debugging purposes, they might get an exception. It's also a bit confusing
> to the client, because if they render the selectable, it will show them the
> FROM entity that it simultaneously claims it missing (that is, if you had a
> way of preventing the exception in the rendering process).  Sending the bad
> SQL to the RDBMS avoids these two caveats.
>
> Also, I found it necessary to use col._from_objects instead of col.table
> for non-trivial column elements.
>
>
>
>> After all that, what I can suggest for you is that since you are looking
>> to "raise an error", that is actually easy to do here using events or
>> compiles.   All you need to do is compare the calculated FROM list to the
>> explicit FROM list:
>>
>> from sqlalchemy import table, column, select
>> from sqlalchemy.ext.compiler import compiles
>> from sqlalchemy.sql import Select
>> from sqlalchemy import exc
>>
>>
>> @compiles(Select)
>> def _strict_froms(stmt, compiler, **kw):
>>     actual_froms = stmt.froms
>>
>>     # TODO: tune this as desired
>>     if stmt._from_obj:
>>         # TODO: we can give you non-underscored access to
>>         # _from_obj
>>         legal_froms = stmt._from_obj
>>     else:
>>         legal_froms = set([
>>             col.table for col in stmt.inner_columns
>>         ])
>>     if len(legal_froms) != len(actual_froms):
>>         raise exc.CompileError("SELECT has implicit froms")
>>
>>     return compiler.visit_select(stmt, **kw)
>>
>>
>> t1 = table('t1', column('a'))
>> t2 = table('t2', column('b'))
>>
>>
>> # works
>> print select([t1]).where(t2.c.b == 5).select_from(t1).select_from(t2)
>>
>> # raises
>> print select([t1]).where(t2.c.b == 5)
>>
>> The issue of "FROM with comma" *is* a problem I'd like to continue to
>> explore solutions towards.   I've long considered various rules in
>> _display_froms but none have ever gotten through the various issues I've
>> detailed above.    I hope this helps to clarify the complexity and depth of
>> this situation.
>>
>>
>>
> --
> SQLAlchemy - The Python SQL Toolkit and Object Relational Mapper
>
> http://www.sqlalchemy.org/
>
> To post example code, please provide an MCVE: Minimal, Complete, and
> Verifiable Example.  See  http://stackoverflow.com/help/mcve for a full
> description.
> --- You received this message because you are subscribed to the Google
> Groups "sqlalchemy" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to [email protected].
> To post to this group, send email to [email protected].
> Visit this group at https://groups.google.com/group/sqlalchemy.
> For more options, visit https://groups.google.com/d/optout.
>

-- 
SQLAlchemy - 
The Python SQL Toolkit and Object Relational Mapper

http://www.sqlalchemy.org/

To post example code, please provide an MCVE: Minimal, Complete, and Verifiable 
Example.  See  http://stackoverflow.com/help/mcve for a full description.
--- 
You received this message because you are subscribed to the Google Groups 
"sqlalchemy" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/sqlalchemy.
For more options, visit https://groups.google.com/d/optout.

Reply via email to