Github user BryanCutler commented on a diff in the pull request: https://github.com/apache/spark/pull/20725#discussion_r210679722 --- Diff: python/pyspark/sql/tests.py --- @@ -4331,13 +4354,22 @@ def test_createDataFrame_fallback_enabled(self): self.assertEqual(df.collect(), [Row(a={u'a': 1})]) def test_createDataFrame_fallback_disabled(self): + from distutils.version import LooseVersion import pandas as pd + import pyarrow as pa with QuietTest(self.sc): with self.assertRaisesRegexp(TypeError, 'Unsupported type'): self.spark.createDataFrame( pd.DataFrame([[{u'a': 1}]]), "a: map<string, int>") + # TODO: remove BinaryType check once minimum pyarrow version is 0.10.0 + if LooseVersion(pa.__version__) < LooseVersion("0.10.0"): + with QuietTest(self.sc): + with self.assertRaisesRegexp(TypeError, 'Unsupported type.*BinaryType'): + self.spark.createDataFrame( + pd.DataFrame([[{'a': b'aaa'}]]), "a: binary") --- End diff -- In general, Spark only accepts `bytearray` and pyarrow can accept `bytearray` or `bytes`. In pyarrow < 0.10.0 bytearrays weren't supported though, which is why I used `bytes` for the pandas pdf here.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org