HyukjinKwon commented on code in PR #47907: URL: https://github.com/apache/spark/pull/47907#discussion_r1735727659
########## python/pyspark/sql/functions/builtin.py: ########## @@ -16308,6 +16308,54 @@ def try_parse_json( return _invoke_function("try_parse_json", _to_java_column(col)) +@_try_remote_functions +def to_variant_object( + col: "ColumnOrName", +) -> Column: + """ + Converts a column containing nested inputs (array/map/struct) into a variants where maps and + structs are converted to variant objects which are unordered unlike SQL structs. Input maps can + only have string keys. + + .. versionadded:: 4.0.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + a column with a nested schema or column name + + Returns + ------- + :class:`~pyspark.sql.Column` + a new column of VariantType. + + Examples + -------- + >>> from pyspark.sql.types import ArrayType, StructType, StructField, StringType, IntegerType, \ + ... MapType + >>> from pyspark.sql.functions.builtin import to_variant_object + >>> schema = StructType([ \ + ... StructField("i", IntegerType(), True), \ + ... StructField("v", ArrayType(StructType([ \ + ... StructField("a", MapType(StringType(), IntegerType()), True) \ + ... ]), True)) \ + ... ]) + >>> data = [(1, [{"a": {"b": 2}}])] + >>> df = spark.createDataFrame(data, schema) + >>> df.select(to_variant_object(df.v)) + DataFrame[to_variant_object(v): variant] + >>> df.select(to_variant_object(df.v)).show(truncate=False) Review Comment: Let's add a test in real unittests, e.g., test_functions.py so the test can run in both Spark Connect and Spark Classic. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org