[GitHub] spark pull request #20949: [SPARK-19018][SQL] Add support for custom encodin...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20949#discussion_r204988217 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala --- @@ -514,6 +516,41 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te } } + test("SPARK-19018: Save csv with custom charset") { + +// scalastyle:off nonascii +val content = "µà áâä ÃÃÃ" +// scalastyle:on nonascii + +Seq("iso-8859-1", "utf-8", "utf-16", "utf-32", "windows-1250").foreach { encoding => + withTempPath { path => +val csvDir = new File(path, "csv") +Seq(content).toDF().write + .option("encoding", encoding) + .csv(csvDir.getCanonicalPath) + +csvDir.listFiles().filter(_.getName.endsWith("csv")).foreach({ csvFile => + val readback = Files.readAllBytes(csvFile.toPath) + val expected = (content + Properties.lineSeparator).getBytes(Charset.forName(encoding)) + assert(readback === expected) +}) + } +} + } + + test("SPARK-19018: error handling for unsupported charsets") { +val exception = intercept[SparkException] { + withTempPath { path => +val csvDir = new File(path, "csv").getCanonicalPath +Seq("a,A,c,A,b,B").toDF().write + .option("encoding", "1-9588-osi") + .csv(csvDir) --- End diff -- nit: you could use directly `path.getCanonicalPath` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20949: [SPARK-19018][SQL] Add support for custom encodin...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20949#discussion_r204988168 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala --- @@ -514,6 +516,41 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te } } + test("SPARK-19018: Save csv with custom charset") { + +// scalastyle:off nonascii +val content = "µà áâä ÃÃÃ" +// scalastyle:on nonascii + +Seq("iso-8859-1", "utf-8", "utf-16", "utf-32", "windows-1250").foreach { encoding => + withTempPath { path => +val csvDir = new File(path, "csv") +Seq(content).toDF().write + .option("encoding", encoding) + .csv(csvDir.getCanonicalPath) + +csvDir.listFiles().filter(_.getName.endsWith("csv")).foreach({ csvFile => --- End diff -- nit: `.foreach({` -> `.foreach {` per https://github.com/databricks/scala-style-guide#anonymous-methods --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #20949: [SPARK-19018][SQL] Add support for custom encodin...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20949#discussion_r204988574 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala --- @@ -514,6 +516,41 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te } } + test("SPARK-19018: Save csv with custom charset") { + +// scalastyle:off nonascii +val content = "µà áâä ÃÃÃ" +// scalastyle:on nonascii + +Seq("iso-8859-1", "utf-8", "utf-16", "utf-32", "windows-1250").foreach { encoding => + withTempPath { path => +val csvDir = new File(path, "csv") +Seq(content).toDF().write --- End diff -- nit: `.write.repartition(1)` to make sure we write only one file --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org