Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/22775#discussion_r228504453 --- Diff: python/pyspark/sql/functions.py --- @@ -2365,30 +2365,32 @@ def to_json(col, options={}): @ignore_unicode_prefix @since(2.4) -def schema_of_json(col, options={}): +def schema_of_json(json, options={}): """ - Parses a column containing a JSON string and infers its schema in DDL format. + Parses a JSON string and infers its schema in DDL format. - :param col: string column in json format + :param json: a JSON string or a string literal containing a JSON string. :param options: options to control parsing. accepts the same options as the JSON datasource .. versionchanged:: 3.0 It accepts `options` parameter to control schema inferring. - >>> from pyspark.sql.types import * - >>> data = [(1, '{"a": 1}')] - >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(schema_of_json(df.value).alias("json")).collect() - [Row(json=u'struct<a:bigint>')] + >>> df = spark.range(1) >>> df.select(schema_of_json(lit('{"a": 0}')).alias("json")).collect() [Row(json=u'struct<a:bigint>')] - >>> schema = schema_of_json(lit('{a: 1}'), {'allowUnquotedFieldNames':'true'}) + >>> schema = schema_of_json('{a: 1}', {'allowUnquotedFieldNames':'true'}) >>> df.select(schema.alias("json")).collect() [Row(json=u'struct<a:bigint>')] """ + if isinstance(json, basestring): --- End diff -- +1
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org