Hey All, Tossed together this patch for databases/postgres.py (see attached) to support pgsql's SELECT DISTINCT ON (col1, col2, col3...) col1, col2, ... functionality. Included are testcases for it.
Supported syntax is: select([...],distinct=True) # Classic 'DISTINCT' behavior on row select([...],distinct=t.c.col) # DISTINCT ON (col) select([...],distinct=[t.c.A, t.c.B...] # DISTINCT ON (A, B, ...) The normal syntax of: select([...]) AND select([...],distinct=False) continue to work as expected. Cheers! -G
pg_distinct.patch
Description: Binary data
import sys
from sqlalchemy import *
# For verbose logging, change the echo and LOG booleans to True
engine = create_engine('postgres', {'database':'test', 'host':'localhost',
'user':'xxx', 'password':'xxx'}, echo=True, echo_uow=True)
objectstore.LOG = True
info_table = Table('infos', engine,
Column('pk', Integer, primary_key=True),
Column('val', Integer),
Column('ts', Integer),
Column('info', String))
info_table.create()
try:
info_table.insert().execute(
{'pk':1, 'val':11, 'ts':0, 'info':'pk_1_info'},
{'pk':2, 'val':12, 'ts':1, 'info':'pk_2_info'},
{'pk':3, 'val':11, 'ts':2, 'info':'pk_3_info'},
{'pk':4, 'val':14, 'ts':3, 'info':'pk_4_info'},
{'pk':5, 'val':12, 'ts':4, 'info':'pk_5_info'})
# Begin Test
# Test 0: No DISTINCT, Normal Query
sql_result = engine.text("""
SELECT val, ts
FROM infos
""").execute().fetchall()
# Outputs:
# [(11, 0), (12, 1), (11, 2), (14, 3), (12, 4)]
print '\n--- --- ---'
print 'The result from the SQL Query:\n' + repr(sql_result)
print ' --- --- ---\n'
# SQLAlchemy Query format
sas_0_result = select([info_table.c.val,
info_table.c.ts]).execute().fetchall()
sas_1_result = select([info_table.c.val, info_table.c.ts],
distinct=False).execute().fetchall()
# Outputs:
# [(11, 0), (12, 1), (11, 2), (14, 3), (12, 4)]
# [(11, 0), (12, 1), (11, 2), (14, 3), (12, 4)]
print '\n--- --- ---'
print 'The result from the SA Query:\n' + repr(sas_0_result)
print 'The result from the SA Query:\n' + repr(sas_1_result)
print '--- --- ---\n'
assert repr(sql_result) == repr(sas_0_result)
assert repr(sql_result) == repr(sas_1_result)
# Test 1: Multicolumn DISTINCT ON query
sql_result = engine.text("""
SELECT DISTINCT ON (val) val, ts
FROM infos
""").execute().fetchall()
# Outputs:
# [(11, 2), (12, 4), (14, 3)]
print '\n--- --- ---'
print 'The result from the SQL Query:\n' + repr(sql_result)
print ' --- --- ---\n'
# SQLAlchemy Query format
sas_result=select([info_table.c.val,
info_table.c.ts],distinct=[info_table.c.val]).execute().fetchall()
# Outputs:
# [(11, 2), (12, 4), (14, 3)]
print '\n--- --- ---'
print 'The result from the SA Query:\n' + repr(sas_result)
print '--- --- ---\n'
assert repr(sql_result) == repr(sas_result)
# Test 2: Row-wide DISTINCT query
sql_result = engine.text("""
SELECT DISTINCT val
FROM infos
""").execute().fetchall()
# Outputs:
# [(11,), (12,), (14,)]
print '\n--- --- ---'
print 'The result from the SQL Query:\n' + repr(sql_result)
print ' --- --- ---\n'
# SQLAlchemy Query format
sas_1_result=select([info_table.c.val],distinct=True).execute().fetchall()
sas_2_result=select([info_table.c.val],distinct=info_table.c.val).execute().fetchall()
sas_3_result=select([info_table.c.val],distinct=[info_table.c.val]).execute().fetchall()
# Outputs:
# [(11,), (12,), (14,)]
print '\n--- --- ---'
print 'The result from the SA Query:\n' + repr(sas_1_result)
print '--- --- ---\n'
assert repr(sql_result) == repr(sas_1_result)
# Outputs:
# [(11,), (12,), (14,)]
print '\n--- --- ---'
print 'The result from the SA Query:\n' + repr(sas_2_result)
print '--- --- ---\n'
assert repr(sql_result) == repr(sas_2_result)
# Outputs:
# [(11,), (12,), (14,)]
print '\n--- --- ---'
print 'The result from the SA Query:\n' + repr(sas_3_result)
print '--- --- ---\n'
assert repr(sql_result) == repr(sas_3_result)
# Test 3: Row wide multicolumn DISTINCT query
sql_result = engine.text("""
SELECT DISTINCT val, ts
FROM infos
""").execute().fetchall()
# Outputs:
# [(11, 0), (11, 2), (12, 1), (12, 4), (14, 3)]
print '\n--- --- ---'
print 'The result from the SQL Query:\n' + repr(sql_result)
print ' --- --- ---\n'
# SQLAlchemy Query format
sas_result=select([info_table.c.val,
info_table.c.ts],distinct=True).execute().fetchall()
# Outputs:
# [(11, 0), (11, 2), (12, 1), (12, 4), (14, 3)]
print '\n--- --- ---'
print 'The result from the SA Query:\n' + repr(sas_result)
print '--- --- ---\n'
assert repr(sql_result) == repr(sas_result)
finally:
info_table.drop()

