[ https://issues.apache.org/jira/browse/SPARK-41900?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sandeep Singh updated SPARK-41900: ---------------------------------- Description: {code:java} import numpy as np from pyspark.sql.functions import lit dtype_to_spark_dtypes = [ (np.int8, [("CAST(1 AS TINYINT)", "tinyint")]), (np.int16, [("CAST(1 AS SMALLINT)", "smallint")]), (np.int32, [("CAST(1 AS INT)", "int")]), (np.int64, [("CAST(1 AS BIGINT)", "bigint")]), (np.float32, [("CAST(1.0 AS FLOAT)", "float")]), (np.float64, [("CAST(1.0 AS DOUBLE)", "double")]), (np.bool_, [("true", "boolean")]), ] for dtype, spark_dtypes in dtype_to_spark_dtypes: self.assertEqual(self.spark.range(1).select(lit(dtype(1))).dtypes, spark_dtypes){code} {code:java} Traceback (most recent call last): File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 1064, in test_lit_np_scalar self.assertEqual(self.spark.range(1).select(lit(dtype(1))).dtypes, spark_dtypes) File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/utils.py", line 332, in wrapped return getattr(functions, f.__name__)(*args, **kwargs) File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/functions.py", line 198, in lit return Column(LiteralExpression._from_value(col)) File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/expressions.py", line 266, in _from_value return LiteralExpression(value=value, dataType=LiteralExpression._infer_type(value)) File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/expressions.py", line 262, in _infer_type raise ValueError(f"Unsupported Data Type {type(value).__name__}") ValueError: Unsupported Data Type int8 {code} was: {code:java} row = self.spark.createDataFrame([("Alice", None, None, None)], schema).fillna(True).first() self.assertEqual(row.age, None){code} {code:java} Traceback (most recent call last): File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_dataframe.py", line 231, in test_fillna self.assertEqual(row.age, None) AssertionError: nan != None{code} {code:java} row = ( self.spark.createDataFrame([("Alice", 10, None)], schema) .replace(10, 20, subset=["name", "height"]) .first() ) self.assertEqual(row.name, "Alice") self.assertEqual(row.age, 10) self.assertEqual(row.height, None) {code} {code:java} Traceback (most recent call last): File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_dataframe.py", line 372, in test_replace self.assertEqual(row.height, None) AssertionError: nan != None {code} > Support data type int8 > ---------------------- > > Key: SPARK-41900 > URL: https://issues.apache.org/jira/browse/SPARK-41900 > Project: Spark > Issue Type: Sub-task > Components: Connect > Affects Versions: 3.4.0 > Reporter: Sandeep Singh > Priority: Major > > {code:java} > import numpy as np > from pyspark.sql.functions import lit > dtype_to_spark_dtypes = [ > (np.int8, [("CAST(1 AS TINYINT)", "tinyint")]), > (np.int16, [("CAST(1 AS SMALLINT)", "smallint")]), > (np.int32, [("CAST(1 AS INT)", "int")]), > (np.int64, [("CAST(1 AS BIGINT)", "bigint")]), > (np.float32, [("CAST(1.0 AS FLOAT)", "float")]), > (np.float64, [("CAST(1.0 AS DOUBLE)", "double")]), > (np.bool_, [("true", "boolean")]), > ] > for dtype, spark_dtypes in dtype_to_spark_dtypes: > self.assertEqual(self.spark.range(1).select(lit(dtype(1))).dtypes, > spark_dtypes){code} > {code:java} > Traceback (most recent call last): > File > "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", > line 1064, in test_lit_np_scalar > self.assertEqual(self.spark.range(1).select(lit(dtype(1))).dtypes, > spark_dtypes) > File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/utils.py", line > 332, in wrapped > return getattr(functions, f.__name__)(*args, **kwargs) > File > "/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/functions.py", > line 198, in lit > return Column(LiteralExpression._from_value(col)) > File > "/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/expressions.py", > line 266, in _from_value > return LiteralExpression(value=value, > dataType=LiteralExpression._infer_type(value)) > File > "/Users/s.singh/personal/spark-oss/python/pyspark/sql/connect/expressions.py", > line 262, in _infer_type > raise ValueError(f"Unsupported Data Type {type(value).__name__}") > ValueError: Unsupported Data Type int8 > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org