[ https://issues.apache.org/jira/browse/SPARK-29041?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Hyukjin Kwon updated SPARK-29041: --------------------------------- Description: {code} spark.createDataFrame([[b"abcd"]], "col binary") {code} simply fails as below: in Python 3 {code} Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/.../spark/python/pyspark/sql/session.py", line 787, in createDataFrame rdd, schema = self._createFromLocal(map(prepare, data), schema) File "/.../spark/python/pyspark/sql/session.py", line 442, in _createFromLocal data = list(data) File "/.../spark/python/pyspark/sql/session.py", line 769, in prepare verify_func(obj) File "/.../forked/spark/python/pyspark/sql/types.py", line 1403, in verify verify_value(obj) File "/.../spark/python/pyspark/sql/types.py", line 1384, in verify_struct verifier(v) File "/.../spark/python/pyspark/sql/types.py", line 1403, in verify verify_value(obj) File "/.../spark/python/pyspark/sql/types.py", line 1397, in verify_default verify_acceptable_types(obj) File "/.../spark/python/pyspark/sql/types.py", line 1282, in verify_acceptable_types % (dataType, obj, type(obj)))) TypeError: field col: BinaryType can not accept object b'abcd' in type <class 'bytes'> {code} in Python 2: {code} Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/.../spark/python/pyspark/sql/session.py", line 787, in createDataFrame rdd, schema = self._createFromLocal(map(prepare, data), schema) File "/.../spark/python/pyspark/sql/session.py", line 442, in _createFromLocal data = list(data) File "/.../spark/python/pyspark/sql/session.py", line 769, in prepare verify_func(obj) File "/.../spark/python/pyspark/sql/types.py", line 1403, in verify verify_value(obj) File "/.../spark/python/pyspark/sql/types.py", line 1384, in verify_struct verifier(v) File "/.../spark/python/pyspark/sql/types.py", line 1403, in verify verify_value(obj) File "/.../spark/python/pyspark/sql/types.py", line 1397, in verify_default verify_acceptable_types(obj) File "/.../spark/python/pyspark/sql/types.py", line 1282, in verify_acceptable_types % (dataType, obj, type(obj)))) TypeError: field col: BinaryType can not accept object 'abcd' in type <type 'str'> {code} {{bytes}} should also be able to accepted as binary type was: {code} spark.createDataFrame([[b"abcd"]], "col binary") {code} simply fails as below: {code} {code} bytes should also be able to accepted as binary type > Allow createDataFrame to accept bytes as binary type > ---------------------------------------------------- > > Key: SPARK-29041 > URL: https://issues.apache.org/jira/browse/SPARK-29041 > Project: Spark > Issue Type: Bug > Components: PySpark > Affects Versions: 2.4.4, 3.0.0 > Reporter: Hyukjin Kwon > Priority: Major > > {code} > spark.createDataFrame([[b"abcd"]], "col binary") > {code} > simply fails as below: > in Python 3 > {code} > Traceback (most recent call last): > File "<stdin>", line 1, in <module> > File "/.../spark/python/pyspark/sql/session.py", line 787, in > createDataFrame > rdd, schema = self._createFromLocal(map(prepare, data), schema) > File "/.../spark/python/pyspark/sql/session.py", line 442, in > _createFromLocal > data = list(data) > File "/.../spark/python/pyspark/sql/session.py", line 769, in prepare > verify_func(obj) > File "/.../forked/spark/python/pyspark/sql/types.py", line 1403, in verify > verify_value(obj) > File "/.../spark/python/pyspark/sql/types.py", line 1384, in verify_struct > verifier(v) > File "/.../spark/python/pyspark/sql/types.py", line 1403, in verify > verify_value(obj) > File "/.../spark/python/pyspark/sql/types.py", line 1397, in verify_default > verify_acceptable_types(obj) > File "/.../spark/python/pyspark/sql/types.py", line 1282, in > verify_acceptable_types > % (dataType, obj, type(obj)))) > TypeError: field col: BinaryType can not accept object b'abcd' in type <class > 'bytes'> > {code} > in Python 2: > {code} > Traceback (most recent call last): > File "<stdin>", line 1, in <module> > File "/.../spark/python/pyspark/sql/session.py", line 787, in > createDataFrame > rdd, schema = self._createFromLocal(map(prepare, data), schema) > File "/.../spark/python/pyspark/sql/session.py", line 442, in > _createFromLocal > data = list(data) > File "/.../spark/python/pyspark/sql/session.py", line 769, in prepare > verify_func(obj) > File "/.../spark/python/pyspark/sql/types.py", line 1403, in verify > verify_value(obj) > File "/.../spark/python/pyspark/sql/types.py", line 1384, in verify_struct > verifier(v) > File "/.../spark/python/pyspark/sql/types.py", line 1403, in verify > verify_value(obj) > File "/.../spark/python/pyspark/sql/types.py", line 1397, in verify_default > verify_acceptable_types(obj) > File "/.../spark/python/pyspark/sql/types.py", line 1282, in > verify_acceptable_types > % (dataType, obj, type(obj)))) > TypeError: field col: BinaryType can not accept object 'abcd' in type <type > 'str'> > {code} > {{bytes}} should also be able to accepted as binary type -- This message was sent by Atlassian Jira (v8.3.2#803003) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org