On Wed, 5 May 2010 15:01:08 -0700
David Gardner <dgard...@creatureshop.com> wrote:

> I was just considering something similar. Were you able to get far
> with this?
> 

Yes, actually I've got a pretty good start on it. The only obvious
thing missing right now is a Comparator implementation so that the
custom hstore operations are available to mapped classes and not just
to the SQL expression language.

I did want to have access to hstore operations outside of mapped
classes (i.e. just using SQL expression language) so I ended up
implementing it slightly differently than I had originally thought. It
required some hacks which I'm not sure how to do better.

Some thoughts...

1. While you can override existing operators that work on ColumnElements
without doing much funny business, if you want to add *new* operations
to it, the abstractions leak fairly badly. This seems to be because
operator definitions aren't delegated to the class representing the
type, which is suboptimal because I would think that the type of a
ColumnElement is what logically "defines" what operations are valid on
it. The result of this is that I have to create classes like
HStoreColumn, HStoreColumnElement, _HStoreDeleteFunction, and so on, so
that SQL expressions which are logically of type 'hstore' will have the
extended hstore operations available.

2. That expression operations on Foo.some_col and foo_table.c.some_col
take completely different paths in the implementation was slightly
surprising. I would have expected the former to be implemented in terms
of the latter, so that SQL expressions available on some column type are
automatically available on the descriptor of a class which maps to that
column. But I don't know, there might be good reasons for this. In any
case I'm trying to figure out how to write my Comparator for hstore
without repeating myself a lot.

Current implementation attached, with really hacky tests at the end :)
Comments welcome.

-Kyle

-- 
You received this message because you are subscribed to the Google Groups 
"sqlalchemy" group.
To post to this group, send email to sqlalch...@googlegroups.com.
To unsubscribe from this group, send email to 
sqlalchemy+unsubscr...@googlegroups.com.
For more options, visit this group at 
http://groups.google.com/group/sqlalchemy?hl=en.

import re
import sqlalchemy.types as satypes
import sqlalchemy.schema as saschema
import sqlalchemy.sql as sasql
import sqlalchemy.sql.expression as saexp
import sqlalchemy.sql.functions as safunc
import sqlalchemy.util as sautil
import sqlalchemy.dialects.postgresql as pgdialect
from sqlalchemy.exc import SQLAlchemyError

__all__ = [ 'HStoreSyntaxError', 'HStore', 'HStoreElement', 'pair',
        'HStoreColumn' ]

# My best guess at the parsing rules of hstore literals, since no formal
# grammar is given. This may be overkill since the docs say that current output
# implementation always quotes keys and values, but gives no explicit guarantee
# that this behavior is dependable. This is mostly reverse engineered from PG's
# input parser behavior.
HSTORE_PAIR_RE = re.compile(r"""
    (
        (?P<key> [^" ] [^= ]* )            # Unquoted keys
      | " (?P<key_q> ([^"] | \\ . )* ) "   # Quoted keys
    )
    [ ]* => [ ]*    # Pair operator, optional adjoining whitespace
    (
        (?P<value> [^" ] [^, ]* )          # Unquoted values
      | " (?P<value_q> ([^"] | \\ . )* ) " # Quoted values
    )
    """, re.VERBOSE)

HSTORE_DELIMITER_RE = re.compile(r"""
    [ ]* , [ ]*
    """, re.VERBOSE)

class HStoreSyntaxError(SQLAlchemyError):
    """Indicates an error unmarshalling an hstore value."""
    def __init__(self, hstore_str, pos):
        self.hstore_str = hstore_str
        self.pos = pos

        CTX = 20
        hslen = len(hstore_str)

        parsed_tail = hstore_str[ max(pos - CTX - 1, 0) : min(pos, hslen) ]
        residual = hstore_str[ min(pos, hslen) : min(pos + CTX + 1, hslen) ]

        if len(parsed_tail) > CTX:
            parsed_tail = '[...]' + parsed_tail[ 1 : ]
        if len(residual) > CTX:
            residual = residual[ : -1 ] + '[...]'

        super(HStoreSyntaxError, self).__init__(
                "After %r, could not parse residual at position %d: %r" %
                (parsed_tail, pos, residual))

def _parse_hstore(hstore_str):
    """
    Parse an hstore from it's literal string representation.
    
    Attempts to approximate PG's hstore input parsing rules as closely as
    possible. Although currently this is not strictly necessary, since the
    current implementation of hstore's output syntax is stricter than what it
    accepts as input, the documentation makes no guarantees that will always
    be the case.

    Throws HStoreSyntaxError if parsing fails.
    """
    result = {}
    pos = 0
    pair_match = HSTORE_PAIR_RE.match(hstore_str)

    while pair_match is not None:
        key = pair_match.group('key') or pair_match.group('key_q')
        key = key.decode('string_escape')
        value = pair_match.group('value') or pair_match.group('value_q')
        value = value.decode('string_escape')
        result[key] = value

        pos += pair_match.end()
                
        delim_match = HSTORE_DELIMITER_RE.match(hstore_str[ pos : ])
        if delim_match is not None:
            pos += delim_match.end()

        pair_match = HSTORE_PAIR_RE.match(hstore_str[ pos : ])

    if pos != len(hstore_str):
        raise HStoreSyntaxError(hstore_str, pos)

    return result

def _serialize_hstore(val):
    """
    Serialize a dictionary into an hstore literal. Keys and values must both be
    strings.
    """
    def esc(s, position):
        try:
            return s.encode('string_escape').replace('"', r'\"')
        except AttributeError:
            raise ValueError("%r in %s position is not a string." %
                    (s, position))
    return ', '.join( '"%s"=>"%s"' % (esc(k, 'key'), esc(v, 'value'))
            for k, v in val.iteritems() )


class HStore(satypes.MutableType, satypes.Concatenable, satypes.TypeEngine):
    """
    The column type for representing PostgreSQL's contrib/hstore type. This
    type is a miniature key-value store in a column. It supports query
    operators for all the usual operations on a map-like data structure.
    """

    name = 'hstore'

    def bind_processor(self, dialect):
        def process(value):
            if value is not None:
                return _serialize_hstore(value)
            else:
                return value
        return process

    def result_processor(self, dialect, coltype):
        def process(value):
            if value is not None:
                return _parse_hstore(value)
            else:
                return value
        return process

    def _adapt_expression(self, op, other_type):
        if op in ['?', '@>', '<@']:
            return op, satypes.Boolean
        elif op == '->':
            return op, other_type
        else:
            return super(HStore, self)._adapt_expression(op, other_type)

    def copy_value(self, value):
        return dict(value)


class HStoreElement(saexp.ColumnElement):
    """
    An expression element that evaluates to an hstore object. This is where the
    expression language extensions for hstore types are implemented.
    """
    def has_key(self, other):
        """Boolean expression. Test for presence of a key. Note that the key
        may be a SQLA expression."""
        return self.op('?')(other)

    def contains(self, other):
        """Boolean expression. Test if keys are a superset of the keys of
        the argument hstore expression."""
        return self.op('@>')(other)

    def contained_by(self, other):
        """Boolean expression. Test if keys are a proper subset of the keys of
        the argument hstore expression."""
        return self.op('<@')(other)

    def __getitem__(self, other):
        """Text expression. Get the value at a given key. Note that the key may
        be a SQLA expression."""
        return self.op('->')(other)

    def concat(self, other):
        """HStore expression. Merge this hstore with the argument hstore, with
        duplicate keys taking the value from the argument."""
        return _HStoreBinaryExpression(self, other, '||', type_=HStore)

    def __add__(self, other):
        """HStore expression. Merge the left and right hstore expressions, with
        duplicate keys taking the value from the right expression."""
        return self.concat(other)

    def assoc(self, key, val):
        """HStore expression. Returns the contents of this hstore updating the
        given key with the given value. Note that the key, value, or both may
        be SQLA expressions."""
        return self.concat(pair(key, val))

    def dissoc(self, key):
        """HStore expression. Returns the contents of this hstore with the
        given key deleted. Note that the key may be a SQLA expression."""
        return _HStoreDeleteFunction(self, key)

    def keys(self):
        """Text array expression. Return array of keys."""
        return _HStoreKeysFunction(self)

    def vals(self):
        """Text array expression. Return array of values."""
        return _HStoreValsFunction(self)


class _HStoreBinaryExpression(HStoreElement, saexp._BinaryExpression):
    pass


class pair(_HStoreBinaryExpression):
    """
    Construct an hstore on the server side using the pair operator.

    This is different from a one-member hstore literal because the key and
    value are evaluated as SQLAlchemy expressions, so the key, value, or both
    may contain columns, function calls, or any other valid SQL expressions which
    evaluate to text.
    """
    def __init__(self, key, val):
        # HACK: We're borrowing this function from _BinaryExpression and
        # something in it's call graph blows up if the type is set, for reasons
        # I do not yet understand.
        self.type = None
        key = self._check_literal('=>', key)
        val = self._check_literal('=>', val)
        saexp._BinaryExpression.__init__(self, key, val, '=>', type_=HStore)


class _HStoreDeleteFunction(HStoreElement, safunc.GenericFunction):
    __return_type__ = HStore
    def __init__(self, store, key, **kwargs):
        safunc.GenericFunction.__init__(self, args=[store, key], **kwargs)
        self.name = 'delete'


class _HStoreKeysFunction(safunc.GenericFunction):
    __return_type__ = pgdialect.ARRAY(satypes.Text)
    def __init__(self, store, **kwargs):
        safunc.GenericFunction.__init__(self, args=[store], **kwargs)
        self.name = 'akeys'


class _HStoreValsFunction(safunc.GenericFunction):
    __return_type__ = pgdialect.ARRAY(satypes.Text)
    def __init__(self, store, **kwargs):
        safunc.GenericFunction.__init__(self, args=[store], **kwargs)
        self.name = 'avals'


class HStoreColumn(HStoreElement, saschema.Column):
    """Same as a regular Column, except it augments the SQL expression language
    with hstore features."""
    pass


if __name__ == '__main__':
    from sqlalchemy import create_engine, MetaData
    from sqlalchemy.schema import Column, Table
    from sqlalchemy.types import Integer, Text
    import sqlalchemy.sql as sql
    import sqlalchemy.orm as orm

    engine = create_engine('postgresql://test:t...@gsr-db.local/sandbox')
    meta = MetaData()

    test_table = Table('test', meta,
            Column('id', Integer(), primary_key=True),
            HStoreColumn('hash', HStore()))

    conn = engine.connect()

    hashcol = test_table.c.hash
    where_tests = [
            hashcol.has_key('foo'),
            hashcol.contains({'foo': '1'}),
            hashcol.contained_by({'foo': '1'}) ]
    select_tests = [
            hashcol['foo'],
            hashcol.dissoc('foo'),
            pair('foo', '3')['foo'],
            hashcol.assoc(sql.cast(test_table.c.id, Text), '3'),
            hashcol + hashcol,
            (hashcol + hashcol)['foo'],
            hashcol.keys() ]

    for wt in where_tests:
        a = sql.select([test_table], whereclause=wt)
        a.bind = conn
        print str(a)
        print str(list(a.execute()))

    for st in select_tests:
        a = sql.select([st])
        a.bind = conn
        print str(a)
        print str(list(a.execute()))

    conn.close()

    class TestObj(object):
        def __init__(self, id_, hash_):
            self.id = id_
            self.hash = hash_
        def __repr__(self):
            return "TestObj(%r, %r)" % (self.id, self.hash)

    orm.mapper(TestObj, test_table)
    Session = orm.sessionmaker(bind=engine)
    ses = Session()

    print list(ses.query(TestObj).all())

    ses.close()

Reply via email to