Repository: spark Updated Branches: refs/heads/master a3ba3a899 -> aec0af4a9
[SPARK-25972][PYTHON] Missed JSON options in streaming.py ## What changes were proposed in this pull request? Added JSON options for `json()` in streaming.py that are presented in the similar method in readwriter.py. In particular, missed options are `dropFieldIfAllNull` and `encoding`. Closes #22973 from MaxGekk/streaming-missed-options. Authored-by: Maxim Gekk <max.g...@gmail.com> Signed-off-by: hyukjinkwon <gurwls...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aec0af4a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aec0af4a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aec0af4a Branch: refs/heads/master Commit: aec0af4a952df2957e21d39d1e0546a36ab7ab86 Parents: a3ba3a8 Author: Maxim Gekk <max.g...@gmail.com> Authored: Sun Nov 11 21:01:29 2018 +0800 Committer: hyukjinkwon <gurwls...@apache.org> Committed: Sun Nov 11 21:01:29 2018 +0800 ---------------------------------------------------------------------- python/pyspark/sql/streaming.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/aec0af4a/python/pyspark/sql/streaming.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 02b14ea..58ca7b8 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -404,7 +404,8 @@ class DataStreamReader(OptionUtils): allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None, allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None, mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None, - multiLine=None, allowUnquotedControlChars=None, lineSep=None, locale=None): + multiLine=None, allowUnquotedControlChars=None, lineSep=None, locale=None, + dropFieldIfAllNull=None, encoding=None): """ Loads a JSON file stream and returns the results as a :class:`DataFrame`. @@ -472,6 +473,13 @@ class DataStreamReader(OptionUtils): :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set, it uses the default value, ``en-US``. For instance, ``locale`` is used while parsing dates and timestamps. + :param dropFieldIfAllNull: whether to ignore column of all null values or empty + array/struct during schema inference. If None is set, it + uses the default value, ``false``. + :param encoding: allows to forcibly set one of standard basic or extended encoding for + the JSON files. For example UTF-16BE, UTF-32LE. If None is set, + the encoding of input JSON will be detected automatically + when the multiLine option is set to ``true``. >>> json_sdf = spark.readStream.json(tempfile.mkdtemp(), schema = sdf_schema) >>> json_sdf.isStreaming @@ -486,7 +494,8 @@ class DataStreamReader(OptionUtils): allowBackslashEscapingAnyCharacter=allowBackslashEscapingAnyCharacter, mode=mode, columnNameOfCorruptRecord=columnNameOfCorruptRecord, dateFormat=dateFormat, timestampFormat=timestampFormat, multiLine=multiLine, - allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep, locale=locale) + allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep, locale=locale, + dropFieldIfAllNull=dropFieldIfAllNull, encoding=encoding) if isinstance(path, basestring): return self._df(self._jreader.json(path)) else: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org