allisonwang-db commented on code in PR #42938: URL: https://github.com/apache/spark/pull/42938#discussion_r1327555626
########## python/pyspark/sql/functions.py: ########## @@ -13041,6 +13041,120 @@ def json_object_keys(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("json_object_keys", col) +@_try_remote_functions +def from_xml( + col: "ColumnOrName", + schema: Union[StructType, Column, str], + options: Optional[Dict[str, str]] = None, +) -> Column: + """ + Parses a column containing a XML string to a row with + the specified schema. Returns `null`, in the case of an unparseable string. + + .. versionadded:: 4.0.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + a column or column name in XML format + schema : :class:`StructType` or str + a StructType or Python string literal with a DDL-formatted string + to use when parsing the Xml column + options : dict, optional + options to control parsing. accepts the same options as the Xml datasource. + See `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option>`_ + for the version you use. + + .. # noqa + + Returns + ------- + :class:`~pyspark.sql.Column` + a new column of complex type from given XML object. + + Examples Review Comment: Documentation is extremely important for a better user-experience. @sandip-db could you please create a ticket under https://issues.apache.org/jira/browse/SPARK-44728. ########## python/pyspark/sql/functions.py: ########## @@ -13041,6 +13041,120 @@ def json_object_keys(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("json_object_keys", col) +@_try_remote_functions +def from_xml( + col: "ColumnOrName", + schema: Union[StructType, Column, str], + options: Optional[Dict[str, str]] = None, +) -> Column: + """ + Parses a column containing a XML string to a row with + the specified schema. Returns `null`, in the case of an unparseable string. + + .. versionadded:: 4.0.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or str + a column or column name in XML format + schema : :class:`StructType` or str + a StructType or Python string literal with a DDL-formatted string + to use when parsing the Xml column + options : dict, optional + options to control parsing. accepts the same options as the Xml datasource. + See `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option>`_ + for the version you use. + + .. # noqa + + Returns + ------- + :class:`~pyspark.sql.Column` + a new column of complex type from given XML object. + + Examples Review Comment: Documentation is extremely important for a better user-experience. @sandip-db could you please create a ticket under https://issues.apache.org/jira/browse/SPARK-44728? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org