[ https://issues.apache.org/jira/browse/SPARK-41902?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sandeep Singh updated SPARK-41902: ---------------------------------- Description: {code:java} expected = {"a": 1, "b": 2} expected2 = {"c": 3, "d": 4} df = self.spark.createDataFrame( [(list(expected.keys()), list(expected.values()))], ["k", "v"] ) actual = ( df.select( expr("map('c', 3, 'd', 4) as dict2"), map_from_arrays(df.k, df.v).alias("dict"), "*", ) .select( map_contains_key("dict", "a").alias("one"), map_contains_key("dict", "d").alias("not_exists"), map_keys("dict").alias("keys"), map_values("dict").alias("values"), map_entries("dict").alias("items"), "*", ) .select( map_concat("dict", "dict2").alias("merged"), map_from_entries(arrays_zip("keys", "values")).alias("from_items"), "*", ) .first() ) self.assertEqual(expected, actual["dict"]){code} {code:java} Traceback (most recent call last): File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 1142, in test_map_functions self.assertEqual(expected, actual["dict"]) AssertionError: {'a': 1, 'b': 2} != [('a', 1), ('b', 2)]{code} was: {code:java} from pyspark.sql import functions funs = [ (functions.acosh, "ACOSH"), (functions.asinh, "ASINH"), (functions.atanh, "ATANH"), ] cols = ["a", functions.col("a")] for f, alias in funs: for c in cols: self.assertIn(f"{alias}(a)", repr(f(c))){code} {code:java} Traceback (most recent call last): File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 271, in test_inverse_trig_functions self.assertIn(f"{alias}(a)", repr(f(c))) AssertionError: 'ACOSH(a)' not found in "Column<'acosh(ColumnReference(a))'>"{code} {code:java} from pyspark.sql.functions import col, lit, overlay from itertools import chain import re actual = list( chain.from_iterable( [ re.findall("(overlay\\(.*\\))", str(x)) for x in [ overlay(col("foo"), col("bar"), 1), overlay("x", "y", 3), overlay(col("x"), col("y"), 1, 3), overlay("x", "y", 2, 5), overlay("x", "y", lit(11)), overlay("x", "y", lit(2), lit(5)), ] ] ) ) expected = [ "overlay(foo, bar, 1, -1)", "overlay(x, y, 3, -1)", "overlay(x, y, 1, 3)", "overlay(x, y, 2, 5)", "overlay(x, y, 11, -1)", "overlay(x, y, 2, 5)", ] self.assertListEqual(actual, expected) df = self.spark.createDataFrame([("SPARK_SQL", "CORE", 7, 0)], ("x", "y", "pos", "len")) exp = [Row(ol="SPARK_CORESQL")] self.assertTrue( all( [ df.select(overlay(df.x, df.y, 7, 0).alias("ol")).collect() == exp, df.select(overlay(df.x, df.y, lit(7), lit(0)).alias("ol")).collect() == exp, df.select(overlay("x", "y", "pos", "len").alias("ol")).collect() == exp, ] ) ) {code} {code:java} Traceback (most recent call last): File "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", line 675, in test_overlay self.assertListEqual(actual, expected) AssertionError: Lists differ: ['overlay(ColumnReference(foo), ColumnReference(bar[402 chars]5))'] != ['overlay(foo, bar, 1, -1)', 'overlay(x, y, 3, -1)'[90 chars] 5)'] First differing element 0: 'overlay(ColumnReference(foo), ColumnReference(bar), Literal(1), Literal(-1))' 'overlay(foo, bar, 1, -1)' - ['overlay(ColumnReference(foo), ColumnReference(bar), Literal(1), Literal(-1))', - 'overlay(ColumnReference(x), ColumnReference(y), Literal(3), Literal(-1))', - 'overlay(ColumnReference(x), ColumnReference(y), Literal(1), Literal(3))', - 'overlay(ColumnReference(x), ColumnReference(y), Literal(2), Literal(5))', - 'overlay(ColumnReference(x), ColumnReference(y), Literal(11), Literal(-1))', - 'overlay(ColumnReference(x), ColumnReference(y), Literal(2), Literal(5))'] + ['overlay(foo, bar, 1, -1)', + 'overlay(x, y, 3, -1)', + 'overlay(x, y, 1, 3)', + 'overlay(x, y, 2, 5)', + 'overlay(x, y, 11, -1)', + 'overlay(x, y, 2, 5)'] {code} > Fix String representation of maps created by `map_from_arrays` > -------------------------------------------------------------- > > Key: SPARK-41902 > URL: https://issues.apache.org/jira/browse/SPARK-41902 > Project: Spark > Issue Type: Sub-task > Components: Connect > Affects Versions: 3.4.0 > Reporter: Sandeep Singh > Priority: Major > > {code:java} > expected = {"a": 1, "b": 2} > expected2 = {"c": 3, "d": 4} > df = self.spark.createDataFrame( > [(list(expected.keys()), list(expected.values()))], ["k", "v"] > ) > actual = ( > df.select( > expr("map('c', 3, 'd', 4) as dict2"), > map_from_arrays(df.k, df.v).alias("dict"), > "*", > ) > .select( > map_contains_key("dict", "a").alias("one"), > map_contains_key("dict", "d").alias("not_exists"), > map_keys("dict").alias("keys"), > map_values("dict").alias("values"), > map_entries("dict").alias("items"), > "*", > ) > .select( > map_concat("dict", "dict2").alias("merged"), > map_from_entries(arrays_zip("keys", "values")).alias("from_items"), > "*", > ) > .first() > ) > self.assertEqual(expected, actual["dict"]){code} > {code:java} > Traceback (most recent call last): > File > "/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_functions.py", > line 1142, in test_map_functions > self.assertEqual(expected, actual["dict"]) > AssertionError: {'a': 1, 'b': 2} != [('a', 1), ('b', 2)]{code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org