Florian Jetter created ARROW-8057: ------------------------------------- Summary: Schema equality not roundtrip safe Key: ARROW-8057 URL: https://issues.apache.org/jira/browse/ARROW-8057 Project: Apache Arrow Issue Type: Bug Components: Python Reporter: Florian Jetter
When performing schema roundtrips, the equality check for fields break. This is a regression from PyArrow 0.16.0 The equality check for entire schemas has never worked (but should from my POV) {code:python} import pyarrow.parquet as pq import pyarrow as pa print(pa.__version__) fields = [ pa.field("bool", pa.bool_()), pa.field("byte", pa.binary()), pa.field("date", pa.date32()), pa.field("datetime64", pa.timestamp("us")), pa.field("float32", pa.float64()), pa.field("float64", pa.float64()), pa.field("int16", pa.int64()), pa.field("int32", pa.int64()), pa.field("int64", pa.int64()), pa.field("int8", pa.int64()), pa.field("null", pa.null()), pa.field("uint16", pa.uint64()), pa.field("uint32", pa.uint64()), pa.field("uint64", pa.uint64()), pa.field("uint8", pa.uint64()), pa.field("unicode", pa.string()), pa.field("array_float32", pa.list_(pa.float64())), pa.field("array_float64", pa.list_(pa.float64())), pa.field("array_int16", pa.list_(pa.int64())), pa.field("array_int32", pa.list_(pa.int64())), pa.field("array_int64", pa.list_(pa.int64())), pa.field("array_int8", pa.list_(pa.int64())), pa.field("array_uint16", pa.list_(pa.uint64())), pa.field("array_uint32", pa.list_(pa.uint64())), pa.field("array_uint64", pa.list_(pa.uint64())), pa.field("array_uint8", pa.list_(pa.uint64())), pa.field("array_unicode", pa.list_(pa.string())), ] schema = pa.schema(fields) buf = pa.BufferOutputStream() pq.write_metadata(schema, buf) reader = pa.BufferReader(buf.getvalue().to_pybytes()) reconstructed_schema = pq.read_schema(reader) assert reconstructed_schema == reconstructed_schema assert reconstructed_schema[0] == reconstructed_schema[0] # This breaks on master / regression from 0.16.0 assert schema[0] == reconstructed_schema[0] # This never worked but should assert reconstructed_schema == schema assert schema == reconstructed_schema {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)