Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/22775#discussion_r228389378 --- Diff: python/pyspark/sql/functions.py --- @@ -2375,20 +2375,22 @@ def schema_of_json(col, options={}): .. versionchanged:: 3.0 It accepts `options` parameter to control schema inferring. - >>> from pyspark.sql.types import * - >>> data = [(1, '{"a": 1}')] - >>> df = spark.createDataFrame(data, ("key", "value")) - >>> df.select(schema_of_json(df.value).alias("json")).collect() - [Row(json=u'struct<a:bigint>')] + >>> df = spark.range(1) >>> df.select(schema_of_json(lit('{"a": 0}')).alias("json")).collect() [Row(json=u'struct<a:bigint>')] - >>> schema = schema_of_json(lit('{a: 1}'), {'allowUnquotedFieldNames':'true'}) + >>> schema = schema_of_json('{a: 1}', {'allowUnquotedFieldNames':'true'}) >>> df.select(schema.alias("json")).collect() [Row(json=u'struct<a:bigint>')] """ + if isinstance(col, basestring): --- End diff -- shall we do the same for scala APIs? i.e. create `def schema_of_json(json: String)`
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org