Richard Tia created ARROW-16975: ----------------------------------- Summary: [Python] Running a substrait plan that includes an extension type returns results where the column.chunks attribute has an AttributeError Key: ARROW-16975 URL: https://issues.apache.org/jira/browse/ARROW-16975 Project: Apache Arrow Issue Type: Bug Components: Python Reporter: Richard Tia Attachments: lineitem.json
SQL {code:java} SELECT l_returnflag, l_linestatus FROM lineitem{code} substrait plan type info for l_returnflag: {code:java} { "fixedChar": { "length": 1, "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" }{code} fixedChar is an extension type. Error: {code:java} pyarrow/table.pxi:1223: in pyarrow.lib.ChunkedArray.chunks.__get__ ??? pyarrow/table.pxi:1241: in iterchunks ??? pyarrow/table.pxi:1185: in pyarrow.lib.ChunkedArray.chunk ??? pyarrow/public-api.pxi:200: in pyarrow.lib.pyarrow_wrap_array ??? _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > ??? E AttributeError: 'pyarrow.lib.BaseExtensionType' object has no attribute '__arrow_ext_class__' {code} Reproduction Steps: {code:java} import pyarrow as pa from pyarrow import json as pyarrow_json from pyarrow.lib import tobytes substrait_query = <code block below> json_file_path = os.path.join(<path>, 'lineitem.json') arrow_data_path_ipc = os.path.join(<path>, 'substrait_data.arrow') substrait_query = tobytes(substrait_query.replace("FILENAME_PLACEHOLDER", arrow_data_path_ipc)) # Save lineitem.json into IPC arrow binary file table = pyarrow_json.read_json(json_file_path) with pa.ipc.RecordBatchFileWriter(filepath, schema=table.schema, arrow_data_path_ipc) as writer: writer.write_table(table) # Run the substrait query plan buf = pa._substrait._parse_json_plan(substrait_query) reader = substrait.run_query(buf) result = reader.read_all() print(result.columns[0].chunks) {code} lineitem.json is attached substrait query plan: {code:java} """ { "extensionUris": [], "extensions": [], "relations": [{ "root": { "input": { "project": { "common": { }, "input": { "read": { "common": { "direct": { } }, "baseSchema": { "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], "struct": { "types": [{ "i64": { "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "i64": { "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "i64": { "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "i32": { "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "decimal": { "scale": 0, "precision": 19, "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "decimal": { "scale": 0, "precision": 19, "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "decimal": { "scale": 0, "precision": 19, "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "decimal": { "scale": 0, "precision": 19, "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "fixedChar": { "length": 1, "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "fixedChar": { "length": 1, "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "date": { "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "date": { "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "date": { "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "fixedChar": { "length": 25, "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "fixedChar": { "length": 10, "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }, { "varchar": { "length": 44, "typeVariationReference": 0, "nullability": "NULLABILITY_NULLABLE" } }], "typeVariationReference": 0, "nullability": "NULLABILITY_REQUIRED" } }, "local_files": { "items": [ { "uri_file": "file://FILENAME_PLACEHOLDER" } ] } } }, "expressions": [{ "selection": { "directReference": { "structField": { "field": 8 } }, "rootReference": { } } }, { "selection": { "directReference": { "structField": { "field": 9 } }, "rootReference": { } } }] } }, "names": ["L_RETURNFLAG", "L_LINESTATUS"] } }], "expectedTypeUrls": [] } {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)