This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push: new 3eb6e38 [SPARK-26645][PYTHON][2.4] Support decimals with negative scale when parsing datatype 3eb6e38 is described below commit 3eb6e389afb2435476bedca2e88590aa38421480 Author: Marco Gaido <marcogaid...@gmail.com> AuthorDate: Wed Nov 25 15:00:39 2020 -0800 [SPARK-26645][PYTHON][2.4] Support decimals with negative scale when parsing datatype ## What changes were proposed in this pull request? This is a backport of #23575 When parsing datatypes from the json internal representation, PySpark doesn't support decimals with negative scales. Since they are allowed and can actually happen, PySpark should be able to successfully parse them. ## How was this patch tested? added test Closes #30503 from dongjoon-hyun/SPARK-26645. Authored-by: Marco Gaido <marcogaid...@gmail.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- python/pyspark/sql/tests.py | 8 +++++++- python/pyspark/sql/types.py | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index b995227..70f3882 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -88,7 +88,7 @@ from pyspark.sql.types import _array_signed_int_typecode_ctype_mappings, _array_ from pyspark.sql.types import _array_unsigned_int_typecode_ctype_mappings from pyspark.sql.types import _merge_type from pyspark.tests import QuietTest, ReusedPySparkTestCase, PySparkTestCase, SparkSubmitTests -from pyspark.sql.functions import UserDefinedFunction, sha2, lit, input_file_name, udf +from pyspark.sql.functions import UserDefinedFunction, sha2, lit, input_file_name, udf, col from pyspark.sql.window import Window from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException @@ -1134,6 +1134,12 @@ class SQLTests(ReusedSQLTestCase): df = self.spark.createDataFrame([{'a': 1}], ["b"]) self.assertEqual(df.columns, ['b']) + def test_negative_decimal(self): + df = self.spark.createDataFrame([(1, ), (11, )], ["value"]) + ret = df.select(col("value").cast(DecimalType(1, -1))).collect() + actual = list(map(lambda r: int(r.value), ret)) + self.assertEqual(actual, [0, 10]) + def test_create_dataframe_from_objects(self): data = [MyObject(1, "1"), MyObject(2, "2")] df = self.spark.createDataFrame(data) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 0d73963..c10da35 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -752,7 +752,7 @@ _all_complex_types = dict((v.typeName(), v) for v in [ArrayType, MapType, StructType]) -_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)") +_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)") def _parse_datatype_string(s): @@ -865,6 +865,8 @@ def _parse_datatype_json_string(json_string): >>> complex_maptype = MapType(complex_structtype, ... complex_arraytype, False) >>> check_datatype(complex_maptype) + >>> # Decimal with negative scale. + >>> check_datatype(DecimalType(1,-1)) """ return _parse_datatype_json_value(json.loads(json_string)) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org