As we're trying to convert from our own homegrown version of the HSTORE type, it seems that our tests have been broken by SQLAlchemy's handling of serialization/de-serialization for hstores containing backslashes.
The current serialization behavior of SQLAlchemy will do this: {'\\"a': '\\"1'} => '"\\\\"a"=>"\\\\"1"' Trying to de-serialize the result yields: ValueError: After '"\\\\"a"=>"\\\\"', could not parse residual at position 12: '1"' This is using the _serialize_hstore and _parse_hstore functions. The correct behavior, I believe, should be this: {'\\"a': '\\"1'} => '"\\\\\\"a"=>"\\\\\\"1"' Trying to de-serialize the result yields: '"\\\\\\"a"=>"\\\\\\"1"' => {'\\"a': '\\"1'} Which is what we're looking for. Attached is a patch and tests. There are also some other parsing problems that I consider to be corner cases and broken as implemented in PostgreSQL, such as: (postgres@[local]:5432 14:05:43) [dev]> select 'a=>,b=>'::hstore; (postgres@[local]:5432 14:05:47) [dev]> select 'a=>, b=>'::hstore; (postgres@[local]:5432 14:06:45) [dev]> select 'a=> , b=>'::hstore; (postgres@[local]:5432 14:06:48) [dev]> select 'a=> ,b=>'::hstore; (postgres@[local]:5432 14:06:50) [dev]> select 'a=>,'::hstore; (postgres@[local]:5432 14:10:12) [dev]> select ',=>,'::hstore; None of which are parsed by SQLAlchemy but some of which are parsed by PostgreSQL. You can see the bug report I filed about some of them here: http://www.postgresql.org/message-id/20120426190513.gb31...@llserver.lakeliving.com -Ryan P. Kelly -- You received this message because you are subscribed to the Google Groups "sqlalchemy" group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy. For more options, visit https://groups.google.com/groups/opt_out.
diff --git a/lib/sqlalchemy/dialects/postgresql/hstore.py b/lib/sqlalchemy/dialects/postgresql/hstore.py index d7368ff..c645e25 100644 --- a/lib/sqlalchemy/dialects/postgresql/hstore.py +++ b/lib/sqlalchemy/dialects/postgresql/hstore.py @@ -68,11 +68,11 @@ def _parse_hstore(hstore_str): pair_match = HSTORE_PAIR_RE.match(hstore_str) while pair_match is not None: - key = pair_match.group('key') + key = pair_match.group('key').replace(r'\"', '"').replace("\\\\", "\\") if pair_match.group('value_null'): value = None else: - value = pair_match.group('value').replace(r'\"', '"') + value = pair_match.group('value').replace(r'\"', '"').replace("\\\\", "\\") result[key] = value pos += pair_match.end() @@ -98,7 +98,7 @@ def _serialize_hstore(val): if position == 'value' and s is None: return 'NULL' elif isinstance(s, util.string_types): - return '"%s"' % s.replace('"', r'\"') + return '"%s"' % s.replace("\\", "\\\\").replace('"', r'\"') else: raise ValueError("%r in %s position is not a string." % (s, position)) diff --git a/test/dialect/test_postgresql.py b/test/dialect/test_postgresql.py index 46a7b31..d277e82 100644 --- a/test/dialect/test_postgresql.py +++ b/test/dialect/test_postgresql.py @@ -2948,6 +2948,16 @@ class HStoreTest(fixtures.TestBase): '"key1"=>"value1", "key2"=>"value2"' ) + def test_bind_serialize_with_slashes_and_quotes(self): + from sqlalchemy.engine import default + + dialect = default.DefaultDialect() + proc = self.test_table.c.hash.type._cached_bind_processor(dialect) + eq_( + proc({'\\"a': '\\"1'}), + '"\\\\\\"a"=>"\\\\\\"1"' + ) + def test_parse_error(self): from sqlalchemy.engine import default @@ -2974,6 +2984,17 @@ class HStoreTest(fixtures.TestBase): {"key1": "value1", "key2": "value2"} ) + def test_result_deserialize_with_slashes_and_quotes(self): + from sqlalchemy.engine import default + + dialect = default.DefaultDialect() + proc = self.test_table.c.hash.type._cached_result_processor( + dialect, None) + eq_( + proc('"\\\\\\"a"=>"\\\\\\"1"'), + {'\\"a': '\\"1'} + ) + def test_bind_serialize_psycopg2(self): from sqlalchemy.dialects.postgresql import psycopg2