[ https://issues.apache.org/jira/browse/ARROW-15645?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Ravi Gummadi updated ARROW-15645: --------------------------------- Description: Am facing an endianness issue on s390x(big endian) when converting the data read through flight to pandas data frame. (1) table.validate() fails with error Traceback (most recent call last): File "/tmp/2.py", line 51, in <module> table.validate() File "pyarrow/table.pxi", line 1232, in pyarrow.lib.Table.validate File "pyarrow/error.pxi", line 99, in pyarrow.lib.check_status pyarrow.lib.ArrowInvalid: Column 1: In chunk 0: Invalid: Negative offsets in binary array (2) table.to_pandas() gives a segmentation fault ____________ Here is a sample code that I am using: from pyarrow import flight import os import json flight_endpoint = os.environ.get("flight_server_url", "grpc+tls://...local:443") print(flight_endpoint) # class TokenClientAuthHandler(flight.ClientAuthHandler): """An example implementation of authentication via handshake. With the default constructor, the user token is read from the environment: TokenClientAuthHandler(). You can also pass a user token as parameter to the constructor, TokenClientAuthHandler(yourtoken). """ def {_}__{_}init{_}__{_}(self, token: str = None): super().{_}__{_}init{_}__{_}() if( token != None): strToken = strToken = 'Bearer {}'.format(token) else: strToken = 'Bearer {}'.format(os.environ.get("some_auth_token")) self.token = strToken.encode('utf-8') #print(self.token) def authenticate(self, outgoing, incoming): outgoing.write(self.token) self.token = incoming.read() def get_token(self): return self.token readClient = flight.FlightClient(flight_endpoint) readClient.authenticate(TokenClientAuthHandler()) cmd = json.dumps(\{...}) descriptor = flight.FlightDescriptor.for_command(cmd) flightInfo = readClient.get_flight_info(descriptor) reader = readClient.do_get(flightInfo.endpoints[0].ticket) table = reader.read_all() print(table) print(table.num_columns) print(table.num_rows) table.validate() table.to_pandas() was: Am facing an endianness issue on s390x(big endian) when converting the data read through flight to pandas data frame. (1) table.validate() fails with error Traceback (most recent call last): File "/tmp/2.py", line 51, in <module> table.validate() File "pyarrow/table.pxi", line 1232, in pyarrow.lib.Table.validate File "pyarrow/error.pxi", line 99, in pyarrow.lib.check_status pyarrow.lib.ArrowInvalid: Column 1: In chunk 0: Invalid: Negative offsets in binary array (2) table.to_pandas() gives a segmentation fault ____________ Here is a sample code that I am using: from pyarrow import flight import os import json flight_endpoint = os.environ.get("flight_server_url", "grpc+tls://...local:443") print(flight_endpoint) # class TokenClientAuthHandler(flight.ClientAuthHandler): """An example implementation of authentication via handshake. With the default constructor, the user token is read from the environment: TokenClientAuthHandler(). You can also pass a user token as parameter to the constructor, TokenClientAuthHandler(yourtoken). """ def _{_}init{_}_(self, token: str = None): super()._{_}init{_}_() if( token != None): strToken = strToken = 'Bearer {}'.format(token) else: strToken = 'Bearer {}'.format(os.environ.get("some_auth_token")) self.token = strToken.encode('utf-8') #print(self.token) def authenticate(self, outgoing, incoming): outgoing.write(self.token) self.token = incoming.read() def get_token(self): return self.token readClient = flight.FlightClient(flight_endpoint) readClient.authenticate(TokenClientAuthHandler()) cmd = json.dumps(\{...}) descriptor = flight.FlightDescriptor.for_command(cmd) flightInfo = readClient.get_flight_info(descriptor) reader = readClient.do_get(flightInfo.endpoints[0].ticket) table = reader.read_all() print(table) print(table.num_columns) print(table.num_rows) table.validate() table.to_pandas() > Data read through Flight is having endianness issue on s390x > ------------------------------------------------------------ > > Key: ARROW-15645 > URL: https://issues.apache.org/jira/browse/ARROW-15645 > Project: Apache Arrow > Issue Type: Bug > Components: C++, FlightRPC, Python > Affects Versions: 5.0.0 > Environment: Linux s390x (big endian) > Reporter: Ravi Gummadi > Priority: Major > > Am facing an endianness issue on s390x(big endian) when converting the data > read through flight to pandas data frame. > (1) table.validate() fails with error > Traceback (most recent call last): > File "/tmp/2.py", line 51, in <module> > table.validate() > File "pyarrow/table.pxi", line 1232, in pyarrow.lib.Table.validate > File "pyarrow/error.pxi", line 99, in pyarrow.lib.check_status > pyarrow.lib.ArrowInvalid: Column 1: In chunk 0: Invalid: Negative offsets in > binary array > (2) table.to_pandas() gives a segmentation fault > ____________ > Here is a sample code that I am using: > from pyarrow import flight > import os > import json > flight_endpoint = os.environ.get("flight_server_url", > "grpc+tls://...local:443") > print(flight_endpoint) > # > class TokenClientAuthHandler(flight.ClientAuthHandler): > """An example implementation of authentication via handshake. > With the default constructor, the user token is read from the > environment: TokenClientAuthHandler(). > You can also pass a user token as parameter to the constructor, > TokenClientAuthHandler(yourtoken). > """ > def {_}__{_}init{_}__{_}(self, token: str = None): > super().{_}__{_}init{_}__{_}() > if( token != None): > strToken = strToken = 'Bearer {}'.format(token) > else: > strToken = 'Bearer {}'.format(os.environ.get("some_auth_token")) > self.token = strToken.encode('utf-8') > #print(self.token) > def authenticate(self, outgoing, incoming): > outgoing.write(self.token) > self.token = incoming.read() > def get_token(self): > return self.token > > readClient = flight.FlightClient(flight_endpoint) > readClient.authenticate(TokenClientAuthHandler()) > cmd = json.dumps(\{...}) > descriptor = flight.FlightDescriptor.for_command(cmd) > flightInfo = readClient.get_flight_info(descriptor) > reader = readClient.do_get(flightInfo.endpoints[0].ticket) > table = reader.read_all() > print(table) > print(table.num_columns) > print(table.num_rows) > table.validate() > table.to_pandas() -- This message was sent by Atlassian Jira (v8.20.1#820001)