[ https://issues.apache.org/jira/browse/ARROW-8057?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
ASF GitHub Bot updated ARROW-8057: ---------------------------------- Labels: pull-request-available (was: ) > [C++] Schema equality not roundtrip safe through Parquet > -------------------------------------------------------- > > Key: ARROW-8057 > URL: https://issues.apache.org/jira/browse/ARROW-8057 > Project: Apache Arrow > Issue Type: Bug > Components: C++, Python > Reporter: Florian Jetter > Assignee: Wes McKinney > Priority: Major > Labels: pull-request-available > > When performing schema roundtrips, the equality check for fields break. This > is a regression from PyArrow 0.16.0 > The equality check for entire schemas has never worked (but should from my > POV) > {code:python} > import pyarrow.parquet as pq > import pyarrow as pa > print(pa.__version__) > fields = [ > pa.field("bool", pa.bool_()), > pa.field("byte", pa.binary()), > pa.field("date", pa.date32()), > pa.field("datetime64", pa.timestamp("us")), > pa.field("float32", pa.float64()), > pa.field("float64", pa.float64()), > pa.field("int16", pa.int64()), > pa.field("int32", pa.int64()), > pa.field("int64", pa.int64()), > pa.field("int8", pa.int64()), > pa.field("null", pa.null()), > pa.field("uint16", pa.uint64()), > pa.field("uint32", pa.uint64()), > pa.field("uint64", pa.uint64()), > pa.field("uint8", pa.uint64()), > pa.field("unicode", pa.string()), > pa.field("array_float32", pa.list_(pa.float64())), > pa.field("array_float64", pa.list_(pa.float64())), > pa.field("array_int16", pa.list_(pa.int64())), > pa.field("array_int32", pa.list_(pa.int64())), > pa.field("array_int64", pa.list_(pa.int64())), > pa.field("array_int8", pa.list_(pa.int64())), > pa.field("array_uint16", pa.list_(pa.uint64())), > pa.field("array_uint32", pa.list_(pa.uint64())), > pa.field("array_uint64", pa.list_(pa.uint64())), > pa.field("array_uint8", pa.list_(pa.uint64())), > pa.field("array_unicode", pa.list_(pa.string())), > ] > schema = pa.schema(fields) > buf = pa.BufferOutputStream() > pq.write_metadata(schema, buf) > reader = pa.BufferReader(buf.getvalue().to_pybytes()) > reconstructed_schema = pq.read_schema(reader) > assert reconstructed_schema == reconstructed_schema > assert reconstructed_schema[0] == reconstructed_schema[0] > # This breaks on master / regression from 0.16.0 > assert schema[0] == reconstructed_schema[0] > # This never worked but should > assert reconstructed_schema == schema > assert schema == reconstructed_schema > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)