This is an automated email from the ASF dual-hosted git repository. xiazcy pushed a commit to branch TINKERPOP-3232 in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
commit 0d212d44c19aa063694632a6ca562b794abb2bb2 Author: Yang Xia <[email protected]> AuthorDate: Wed Mar 25 15:18:41 2026 -0700 Use HashableDict for non-hashable elements in set --- .../gremlin_python/structure/io/graphbinaryV1.py | 6 ++- .../gremlin_python/structure/io/graphsonV3d0.py | 9 +++- .../driver/test_driver_remote_connection.py | 15 ++++++ .../python/tests/unit/io/test_graphbinaryV1.py | 60 ++++++++++++++++++++++ .../main/python/tests/unit/io/test_graphsonV3d0.py | 58 +++++++++++++++++++++ 5 files changed, 146 insertions(+), 2 deletions(-) diff --git a/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py b/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py index 33ec78431b..b9b3338d82 100644 --- a/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py +++ b/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py @@ -520,7 +520,11 @@ class SetDeserializer(ListIO): @classmethod def objectify(cls, buff, reader, nullable=True): - return set(ListIO.objectify(buff, reader, nullable)) + the_list = ListIO.objectify(buff, reader, nullable) + try: + return set(the_list) + except TypeError: + return set(HashableDict.of(e) for e in the_list) class MapIO(_GraphBinaryTypeIO): diff --git a/gremlin-python/src/main/python/gremlin_python/structure/io/graphsonV3d0.py b/gremlin-python/src/main/python/gremlin_python/structure/io/graphsonV3d0.py index 03415c69e2..4d25816abd 100644 --- a/gremlin-python/src/main/python/gremlin_python/structure/io/graphsonV3d0.py +++ b/gremlin-python/src/main/python/gremlin_python/structure/io/graphsonV3d0.py @@ -438,9 +438,16 @@ class SetIO(_GraphSONTypeIO): In case Java returns numeric values of different types which python don't recognize, coerce and return a list. See comments of TINKERPOP-1844 for more details + + In case the set contains non-hashable elements (e.g. dict, list), + use HashableDict.of() to make them hashable. + See TINKERPOP-3232 for more details """ new_list = [reader.to_object(obj) for obj in s] - new_set = set(new_list) + try: + new_set = set(new_list) + except TypeError: + new_set = set(HashableDict.of(e) for e in new_list) if len(new_list) != len(new_set): log.warning("Coercing g:Set to list due to java numeric values. " "See TINKERPOP-1844 for more details.") diff --git a/gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py b/gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py index e59ec942ec..72f45b8aa9 100644 --- a/gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py +++ b/gremlin-python/src/main/python/tests/integration/driver/test_driver_remote_connection.py @@ -26,6 +26,7 @@ from gremlin_python.process.traversal import Traverser from gremlin_python.process.traversal import TraversalStrategy from gremlin_python.process.traversal import Bindings from gremlin_python.process.traversal import P, Order, T +from gremlin_python.process.traversal import Scope from gremlin_python.process.graph_traversal import __ from gremlin_python.process.anonymous_traversal import traversal from gremlin_python.structure.graph import Vertex @@ -143,6 +144,20 @@ class TestDriverRemoteConnection(object): assert p.objects[1].properties is not None and len(p.objects[1].properties) > 0 assert p.objects[2].properties is not None and len(p.objects[2].properties) > 0 + def test_set_with_unhashable_elements(self, remote_connection): + # test that a query returning a Set containing non-hashable elements (maps) can be + # deserialized without a TypeError - see TINKERPOP-3232 + # GraphSON v2 does not have a Set type so it deserializes as list - skip for v2 + if isinstance(remote_connection._client._message_serializer, GraphSONSerializersV2d0): + return + g = traversal().withRemote(remote_connection) + # g.V().valueMap().dedup(Scope.local) returns a Set of Map results which previously + # threw TypeError because Python sets cannot contain unhashable dict elements + results = g.V().valueMap().dedup(Scope.local).toList() + assert len(results) > 0 + for r in results: + assert isinstance(r, set) + def test_lambda_traversals(self, remote_connection): statics.load_statics(globals()) assert "remoteconnection[{},gmodern]".format(test_no_auth_url) == str(remote_connection) diff --git a/gremlin-python/src/main/python/tests/unit/io/test_graphbinaryV1.py b/gremlin-python/src/main/python/tests/unit/io/test_graphbinaryV1.py index 92c8edc4ba..30105c6659 100644 --- a/gremlin-python/src/main/python/tests/unit/io/test_graphbinaryV1.py +++ b/gremlin-python/src/main/python/tests/unit/io/test_graphbinaryV1.py @@ -24,6 +24,7 @@ import math from gremlin_python.statics import timestamp, long, bigint, BigDecimal, SingleByte, SingleChar, ByteBufferType from gremlin_python.structure.graph import Vertex, Edge, Property, VertexProperty, Path from gremlin_python.structure.io.graphbinaryV1 import GraphBinaryWriter, GraphBinaryReader +from gremlin_python.structure.io.util import HashableDict from gremlin_python.process.traversal import Barrier, Binding, Bytecode, Merge, Direction @@ -122,6 +123,65 @@ class TestGraphSONWriter(object): output = self.graphbinary_reader.read_object(self.graphbinary_writer.write_object(x)) assert x == output + def test_set_with_unhashable_dict_elements(self): + # test that sets containing dicts can be deserialized - see TINKERPOP-3232 + x = [{"name": "marko", "age": 29}, {"name": "josh", "age": 32}] + output = self.graphbinary_reader.read_object(self.graphbinary_writer.write_object(set( + HashableDict.of(e) for e in x))) + assert isinstance(output, set) + assert len(output) == 2 + + def test_set_with_unhashable_list_elements(self): + # test that sets containing lists can be deserialized - see TINKERPOP-3232 + # build a set payload manually: write as list-of-lists, then patch the type byte to set + list_payload = self.graphbinary_writer.write_object([["marko", "josh"], ["vadas", "peter"]]) + # the first byte is the DataType for list (0x09), change it to set (0x0b) + set_payload = bytearray(list_payload) + set_payload[0] = 0x0b + output = self.graphbinary_reader.read_object(set_payload) + assert isinstance(output, set) + assert len(output) == 2 + + def test_set_with_unhashable_set_elements(self): + # test that sets containing sets can be deserialized - see TINKERPOP-3232 + # build a set-of-sets payload: write as list-of-lists, patch outer and inner type bytes + inner1 = self.graphbinary_writer.write_object(["a", "b"]) + inner2 = self.graphbinary_writer.write_object(["c", "d"]) + # patch inner payloads from list (0x09) to set (0x0b) + inner1 = bytearray(inner1) + inner1[0] = 0x0b + inner2 = bytearray(inner2) + inner2[0] = 0x0b + # build outer set: type byte (set=0x0b) + nullable (0x00) + count (2) + inner payloads + import struct + outer = bytearray([0x0b, 0x00]) + struct.pack('>i', 2) + inner1 + inner2 + output = self.graphbinary_reader.read_object(outer) + assert isinstance(output, set) + assert len(output) == 2 + + def test_set_with_mixed_hashable_and_unhashable_elements(self): + # test that sets containing a mix of hashable and unhashable elements work - see TINKERPOP-3232 + # build payload: write a list of [string, dict, int], then patch type to set + x = ["marko", {"name": "josh"}, 42] + list_payload = self.graphbinary_writer.write_object(x) + set_payload = bytearray(list_payload) + set_payload[0] = 0x0b + output = self.graphbinary_reader.read_object(set_payload) + assert isinstance(output, set) + assert len(output) == 3 + + def test_set_with_nested_unhashable_elements(self): + # test that sets containing dicts with list values can be deserialized - see TINKERPOP-3232 + # build payload manually since HashableDict.of() converts lists to tuples which can't be serialized + x = [{"name": "marko", "langs": ["java", "python"]}, {"name": "josh", "langs": ["gremlin"]}] + list_payload = self.graphbinary_writer.write_object(x) + # patch outer type from list (0x09) to set (0x0b) + set_payload = bytearray(list_payload) + set_payload[0] = 0x0b + output = self.graphbinary_reader.read_object(set_payload) + assert isinstance(output, set) + assert len(output) == 2 + def test_dict(self): x = {"yo": "what?", "go": "no!", diff --git a/gremlin-python/src/main/python/tests/unit/io/test_graphsonV3d0.py b/gremlin-python/src/main/python/tests/unit/io/test_graphsonV3d0.py index fd98da313d..027fb3a4bd 100644 --- a/gremlin-python/src/main/python/tests/unit/io/test_graphsonV3d0.py +++ b/gremlin-python/src/main/python/tests/unit/io/test_graphsonV3d0.py @@ -32,6 +32,7 @@ from gremlin_python.statics import * from gremlin_python.structure.graph import Vertex, Edge, Property, VertexProperty, Path from gremlin_python.structure.io.graphsonV3d0 import GraphSONWriter, GraphSONReader, GraphSONUtil import gremlin_python.structure.io.graphsonV3d0 +from gremlin_python.structure.io.util import HashableDict from gremlin_python.process.traversal import P, Merge, Barrier, Order, Operator, Direction from gremlin_python.process.strategies import SubgraphStrategy from gremlin_python.process.graph_traversal import __ @@ -86,6 +87,63 @@ class TestGraphSONReader: assert x.count("marko") == 1 assert x.count("josh") == 3 + def test_set_with_unhashable_dict_elements(self): + # test that sets containing dicts can be deserialized - see TINKERPOP-3232 + x = self.graphson_reader.read_object( + json.dumps({"@type": "g:Set", "@value": [ + {"@type": "g:Map", "@value": ["name", "marko", "age", {"@type": "g:Int32", "@value": 29}]}, + {"@type": "g:Map", "@value": ["name", "josh", "age", {"@type": "g:Int32", "@value": 32}]} + ]})) + assert isinstance(x, set) + assert len(x) == 2 + + def test_set_with_unhashable_list_elements(self): + # test that sets containing lists can be deserialized - see TINKERPOP-3232 + x = self.graphson_reader.read_object( + json.dumps({"@type": "g:Set", "@value": [ + {"@type": "g:List", "@value": ["marko", "josh"]}, + {"@type": "g:List", "@value": ["vadas", "peter"]} + ]})) + assert isinstance(x, set) + assert len(x) == 2 + + def test_set_with_unhashable_set_elements(self): + # test that sets containing sets can be deserialized - see TINKERPOP-3232 + x = self.graphson_reader.read_object( + json.dumps({"@type": "g:Set", "@value": [ + {"@type": "g:Set", "@value": ["a", "b"]}, + {"@type": "g:Set", "@value": ["c", "d"]} + ]})) + assert isinstance(x, set) + assert len(x) == 2 + + def test_set_with_mixed_hashable_and_unhashable_elements(self): + # test that sets containing a mix of hashable and unhashable elements work - see TINKERPOP-3232 + x = self.graphson_reader.read_object( + json.dumps({"@type": "g:Set", "@value": [ + "marko", + {"@type": "g:Map", "@value": ["name", "josh"]}, + {"@type": "g:Int32", "@value": 42} + ]})) + assert isinstance(x, set) + assert len(x) == 3 + + def test_set_with_nested_unhashable_elements(self): + # test that sets containing dicts with list values can be deserialized - see TINKERPOP-3232 + x = self.graphson_reader.read_object( + json.dumps({"@type": "g:Set", "@value": [ + {"@type": "g:Map", "@value": [ + "name", "marko", + "langs", {"@type": "g:List", "@value": ["java", "python"]} + ]}, + {"@type": "g:Map", "@value": [ + "name", "josh", + "langs", {"@type": "g:List", "@value": ["gremlin"]} + ]} + ]})) + assert isinstance(x, set) + assert len(x) == 2 + def test_number_input(self): x = self.graphson_reader.read_object(json.dumps({ "@type": "gx:Byte",
