Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20937#discussion_r180112595 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala --- @@ -86,14 +85,34 @@ private[sql] class JSONOptions( val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false) - val lineSeparator: Option[String] = parameters.get("lineSep").map { sep => - require(sep.nonEmpty, "'lineSep' cannot be an empty string.") - sep + /** + * A string between two consecutive JSON records. + */ + val lineSeparator: Option[String] = parameters.get("lineSep") + + /** + * Standard encoding (charset) name. For example UTF-8, UTF-16LE and UTF-32BE. + * If the encoding is not specified (None), it will be detected automatically. + */ + val encoding: Option[String] = parameters.get("encoding") + .orElse(parameters.get("charset")).map { enc => + val blacklist = List("UTF16", "UTF32") + val isBlacklisted = blacklist.contains(enc.toUpperCase.replaceAll("-|_", "")) + require(multiLine || !isBlacklisted, + s"""The ${enc} encoding must not be included in the blacklist: + | ${blacklist.mkString(", ")}""".stripMargin) + + val forcingLineSep = !(multiLine == false && enc != "UTF-8" && lineSeparator.isEmpty) + require(forcingLineSep, + s"""The lineSep option must be specified for the $enc encoding. + |Example: .option("lineSep", "|^|") --- End diff -- yea but if you execute this in SQL context, the example is irrelevant.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org