Howdy all, I'm relatively new to SQLAlchemy, so I hope this isn't too silly of a question/issue, but I'm having some issues with a TypeDecorator type I've created and am seeking advice.
I have a situation where I'd like to automatically convert hex strings (in this case they are actually MD5 values) into binary equivalents and store them in a BLOB, so I have this TypeDecorator (shown in the test code at the bottom of this message) that sort of works. It seems to do the right thing in sqlite (stores as binary but retrieves as the hex string) but in postgres (via psychopg2, v2.3.2 and v2.2.2) it seems to do the right thing on store but does like a double decode on retrieve. To illustrate, here's the output of my test case: > 2010-12-29 14:13:27,246 [DEBUG] Getting session(s) > 2010-12-29 14:13:27,430 [DEBUG] Populating session sqlite:///sqla_test.db > 2010-12-29 14:13:27,521 [DEBUG] Populating session postgresql +psycopg2://postgres:@localhost/my_test > 2010-12-29 14:13:27,524 [DEBUG] Dumping session(s) > 2010-12-29 14:13:27,524 [DEBUG] Querying session sqlite:///sqla_test.db > 2010-12-29 14:13:27,525 [DEBUG] (2 objects) > 2010-12-29 14:13:27,526 [DEBUG] MyFile md5sum = 0123456789abcdef0123456789abcdef, filename = foo.exe > 2010-12-29 14:13:27,526 [DEBUG] MyFile md5sum = 00000000000000000000000000ff00ff, filename = bar.txt > 2010-12-29 14:13:27,526 [DEBUG] Querying session postgresql +psycopg2://postgres:@localhost/my_test > 2010-12-29 14:13:27,527 [DEBUG] (2 objects) > 2010-12-29 14:13:27,528 [DEBUG] MyFile md5sum = 783031323334353637383961626364656630313233343536373839616263646566, filename = foo.exe > 2010-12-29 14:13:27,528 [DEBUG] MyFile md5sum = 783030303030303030303030303030303030303030303030303030666630306666, filename = bar.txt 78 is ASCII for 'x', the other values are the ASCII reps for 0-9a-f. Selecting directly from the tables I get: my_test=# select * from my_files; md5sum | filename ------------------------------------+---------- \x0123456789abcdef0123456789abcdef | foo.exe \x00000000000000000000000000ff00ff | bar.txt (2 rows) So it is storing it in binary form, AFAICT, and the psql/postgres bytea handling is converting it back for display, which is probably happening through psycopg2 I'm guessing, so then I'm getting a double conversion. Of course, I tried switching to just a LargeBinary type to see what would happen, and here's the output of that: > 2010-12-29 14:21:58,265 [DEBUG] Getting session(s) > 2010-12-29 14:21:58,500 [DEBUG] Populating session sqlite:///sqla_test.db > 2010-12-29 14:21:58,589 [DEBUG] Populating session postgresql +psycopg2://postgres:@localhost/my_test > 2010-12-29 14:21:58,592 [DEBUG] Dumping session(s) > 2010-12-29 14:21:58,593 [DEBUG] Querying session sqlite:///sqla_test.db > 2010-12-29 14:21:58,593 [DEBUG] (2 objects) > 2010-12-29 14:21:58,594 [DEBUG] MyFile md5sum = 0123456789abcdef0123456789abcdef, filename = foo.exe > 2010-12-29 14:21:58,594 [DEBUG] MyFile md5sum = 00000000000000000000000000ff00ff, filename = bar.txt > 2010-12-29 14:21:58,594 [DEBUG] Querying session postgresql +psycopg2://postgres:@localhost/my_test > 2010-12-29 14:21:58,595 [DEBUG] (2 objects) > 2010-12-29 14:21:58,596 [DEBUG] MyFile md5sum = x3031323334353637383961626364656630313233343536373839616263646566, filename = foo.exe > 2010-12-29 14:21:58,597 [DEBUG] MyFile md5sum = x3030303030303030303030303030303030303030303030303030666630306666, filename = bar.txt Notice that sqlite looks identical, but the postgres output is still encoded (except the 'x' this time). And selecting directly shows that it's stored in ASCII, not the binary values: my_test=# select * from my_files; md5sum | filename -------------------------------------------------------------------- +---------- \x3031323334353637383961626364656630313233343536373839616263646566 | foo.exe \x3030303030303030303030303030303030303030303030303030666630306666 | bar.txt (2 rows) It is also stored as ASCII in the sqlite db this way too (so I don't get a [theoretical] storage savings that I'm looking for, so I'd prefer to have it stored as binary instead of as an ASCII or Unicode string). So, any advice on how to get this type of storage to behave identically under sqlite & postgres (and eventually MySQL, probably) would be greatly appreciated! Thanks, Chuck ----->8 test code 8<----- # # test case for odd binascii conversion issue # import binascii import sqlalchemy as sa import sqlalchemy.types as sa_types import sqlalchemy.orm as sa_orm import sqlalchemy.orm.collections as sa_col import sqlalchemy.ext.declarative as sa_decl import sqlalchemy.ext.associationproxy as sa_asprox Base = sa_decl.declarative_base() SessionMap = {} EngineMap = {} # convert MD5 and binary name binascii to/from binary automagically: class BinAsciiType(sa_types.TypeDecorator): """Converts between ascii hex char string and BLOB storage invisibly""" #impl = types.LargeBinary(8) # might be nice to limit to a specific size, but doesn't seem to work impl = sa_types.LargeBinary def process_bind_param(self, value, dialect): rc = None try: if value is not None: rc = binascii.a2b_hex(value) except: # fail silently if can't convert? pass return rc def process_result_value(self, value, dialect): rc = None try: if value is not None: rc = binascii.b2a_hex(value) except: # fail silently if can't convert? pass return rc def copy(self): return BinAsciiType(self.impl.length) class MyFile(Base): __tablename__ = 'my_files' md5sum = sa.Column(BinAsciiType, primary_key=True) #md5sum = sa.Column(sa_types.LargeBinary, primary_key=True) filename = sa.Column(sa.String, primary_key=True) def __init__(self, md5sum = None, filename = None): self.md5sum = md5sum self.filename = filename def __str__(self): return "MyFile md5sum = %s, filename = %s"% (self.md5sum,self.filename) def GetDBSession(db_url, full_init=False): """Initialize or retrieve a previously initilized SQLAlchemySessionWrapper object for the given db_url parameter""" global Base global SessionMap global EngineMap session = None engine = None if db_url in SessionMap: session = SessionMap[db_url] engine = EngineMap[db_url] else: engine = sa.create_engine(db_url, echo=False) session = sa_orm.sessionmaker(bind=engine)() EngineMap[db_url] = engine SessionMap[db_url] = session if full_init == True: Base.metadata.drop_all(engine) # always do the create_all, in case new types were added since last time... Base.metadata.create_all(engine) return session if __name__ == '__main__': try: import os import sys import logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s [%(levelname)s] % (message)s', stream=sys.stderr) logger = logging.getLogger('TEST') logger.debug("Getting session(s)") session_list = list() session_list.append(GetDBSession("sqlite:/// sqla_test.db",full_init=True)) session_list.append(GetDBSession("postgresql+psycopg2:// postgres:@localhost/my_test",full_init=True)) # populate the session(s): for session in session_list: logger.debug("Populating session %s"%session.bind.url) myfile1 = MyFile("0123456789abcdef0123456789abcdef","foo.exe") myfile2 = MyFile("00000000000000000000000000ff00ff","bar.txt") session.add(myfile1) session.add(myfile2) session.commit() # dump some data from the session(s): logger.debug("Dumping session(s)") for session in session_list: logger.debug("Querying session %s"%(session.bind.url)) logger.debug(" (%d objects)"% (session.query(MyFile).count())) for obj in session.query(MyFile): logger.debug(" %s"%(str(obj))) except Exception, e: logger.error("EXCEPTION: %s"%str(e)) -- You received this message because you are subscribed to the Google Groups "sqlalchemy" group. To post to this group, send email to sqlalch...@googlegroups.com. To unsubscribe from this group, send email to sqlalchemy+unsubscr...@googlegroups.com. For more options, visit this group at http://groups.google.com/group/sqlalchemy?hl=en.