Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20937#discussion_r183227276 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala --- @@ -175,11 +187,15 @@ object MultiLineJsonDataSource extends JsonDataSource { .values } - private def createParser(jsonFactory: JsonFactory, record: PortableDataStream): JsonParser = { + private def createParser( + jsonFactory: JsonFactory, + record: PortableDataStream, + encoding: Option[String]): JsonParser = { val path = new Path(record.getPath()) - CreateJacksonParser.inputStream( - jsonFactory, - CodecStreams.createInputStreamWithCloseResource(record.getConfiguration, path)) + val is = CodecStreams.createInputStreamWithCloseResource(record.getConfiguration, path) + + encoding.map(enc => CreateJacksonParser.inputStream(enc, jsonFactory, is)) + .getOrElse(CreateJacksonParser.inputStream(jsonFactory, is)) --- End diff -- Hm, @MaxGekk, wouldn't this also do a record per operation too?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org