After tracking down some extreme slowness in loading a one-to-many 
relationship (e.g. myobject.foobars), I seem to have isolated the issue to 
engine.execute() being much slower with parameterized queries than with 
explicit queries. The following is actual code and output for loading 
10,971 rows from a database table. (The actual table has more columns than 
I'm including here, and is not designed by me.) Note that each explicit 
query (where I explicitly set the WHERE clause parameter and pass the 
resulting SQL statement to engine.execute()) runs in under 0.1 seconds, 
whereas each parameterized query (where I let SQLAlchemy bind the WHERE 
clause parameter) takes over 8 seconds.

The difference in runtimes is smaller when the number of rows returned is 
smaller, which seems odd since I would have thought that the binding of the 
WHERE clause parameters is just done once and would be virtually 
instantaneous.

Any thoughts?

Thanks,

Seth


import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
import time

engine = sa.create_engine('mssql+pyodbc://Compustat/Compustat')
session = sessionmaker(bind=engine, autoflush=False, 
expire_on_commit=False)()

class FooBar(declarative_base()):
    __tablename__ = 'sec_dprc'
    gvkey = sa.Column(sa.String(6), primary_key=True)
    datadate = sa.Column(sa.DateTime, primary_key=True)
    value = sa.Column(sa.Float, name='prcod')

def print_timing(func):
    def wrapper(*arg):
        t1 = time.time()
        rows = func(*arg)
        t2 = time.time()
        print("%30s() len=%d, last=%s, runtime=%0.3fs" % (str(func).split(' 
at')[0][10:], len(rows), rows[-1], t2 - t1))
        return t2 - t1
    return wrapper

if __name__ == '__main__':

    key = '001045'
    query = session.query(FooBar.datadate, 
FooBar.value).filter(sa.and_(FooBar.gvkey == key)).order_by(FooBar.datadate)
    sql_select_statement_base = str(query)
    print(sql_select_statement_base)

    @print_timing
    def execute_explicit_query():
        sql_select_statement_explicit = 
sql_select_statement_base.replace(":gvkey_1", "'%s'" % key)
        rows = 
engine.execute(sa.text(sql_select_statement_explicit)).fetchall()
        return rows

    @print_timing
    def execute_parameterized_query():
        rows = engine.execute(sa.text(sql_select_statement_base), 
{'gvkey_1':key}).fetchall()
        return rows

    num_iterations = 5
    explicit_runtime = 0.0
    parameterized_runtime = 0.0
    for i in range(num_iterations):
        explicit_runtime += execute_explicit_query()
        parameterized_runtime += execute_parameterized_query()
    print("Total runtime for %d explicit queries = %0.3fs." % 
(num_iterations, explicit_runtime))
    print("Total runtime for %d parameterized queries = %0.3fs." % 
(num_iterations, parameterized_runtime))


SELECT sec_dprc.datadate AS sec_dprc_datadate, sec_dprc.prcod AS 
sec_dprc_prcod 
FROM sec_dprc 
WHERE sec_dprc.gvkey = :gvkey_1 ORDER BY sec_dprc.datadate
        execute_explicit_query() len=10971, last=(datetime.datetime(2014, 
5, 9, 0, 0), 37.96), runtime=0.082s
   execute_parameterized_query() len=10971, last=(datetime.datetime(2014, 
5, 9, 0, 0), 37.96), runtime=8.852s
        execute_explicit_query() len=10971, last=(datetime.datetime(2014, 
5, 9, 0, 0), 37.96), runtime=0.032s
   execute_parameterized_query() len=10971, last=(datetime.datetime(2014, 
5, 9, 0, 0), 37.96), runtime=8.754s
        execute_explicit_query() len=10971, last=(datetime.datetime(2014, 
5, 9, 0, 0), 37.96), runtime=0.039s
   execute_parameterized_query() len=10971, last=(datetime.datetime(2014, 
5, 9, 0, 0), 37.96), runtime=9.182s
        execute_explicit_query() len=10971, last=(datetime.datetime(2014, 
5, 9, 0, 0), 37.96), runtime=0.028s
   execute_parameterized_query() len=10971, last=(datetime.datetime(2014, 
5, 9, 0, 0), 37.96), runtime=9.416s
        execute_explicit_query() len=10971, last=(datetime.datetime(2014, 
5, 9, 0, 0), 37.96), runtime=0.080s
   execute_parameterized_query() len=10971, last=(datetime.datetime(2014, 
5, 9, 0, 0), 37.96), runtime=8.425s
Total runtime for 5 explicit queries = 0.260s.
Total runtime for 5 parameterized queries = 44.629s.

-- 
You received this message because you are subscribed to the Google Groups 
"sqlalchemy" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to sqlalchemy+unsubscr...@googlegroups.com.
To post to this group, send email to sqlalchemy@googlegroups.com.
Visit this group at http://groups.google.com/group/sqlalchemy.
For more options, visit https://groups.google.com/d/optout.

Reply via email to