Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/19319#discussion_r140485612 --- Diff: python/pyspark/sql/dataframe.py --- @@ -1891,14 +1892,19 @@ def toPandas(self): "if using spark.sql.execution.arrow.enable=true" raise ImportError("%s\n%s" % (e.message, msg)) else: + pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns) + dtype = {} for field in self.schema: pandas_type = _to_corrected_pandas_type(field.dataType) - if pandas_type is not None: + # SPARK-21766: if an integer field is nullable and has null values, it can be + # inferred by pandas as float column. Once we convert the column with NaN back + # to integer type e.g., np.int16, we will hit exception. --- End diff -- BTW, It'd be nicer if we add some comments saying like .. so, we use the inferred float type, not the corrected type from the schema.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org