Github user WeichenXu123 commented on a diff in the pull request: https://github.com/apache/spark/pull/18581#discussion_r154594735 --- Diff: mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala --- @@ -184,4 +184,54 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { spark.sql("DROP TABLE IF EXISTS libsvmTable") } } + + def testLineSeparator(lineSep: String): Unit = { + test(s"SPARK-21289: Support line separator - lineSep: '$lineSep'") { + val data = Seq( + "1.0 1:1.0 3:2.0 5:3.0", "0.0", "0.0", "0.0 2:4.0 4:5.0 6:6.0").mkString(lineSep) + val dataWithTrailingLineSep = s"$data$lineSep" + + Seq(data, dataWithTrailingLineSep).foreach { lines => + val path0 = new File(tempDir.getCanonicalPath, "write0") + val path1 = new File(tempDir.getCanonicalPath, "write1") + try { + // Read + java.nio.file.Files.write(path0.toPath, lines.getBytes(StandardCharsets.UTF_8)) + val df = spark.read + .option("lineSep", lineSep) + .format("libsvm") + .load(path0.getAbsolutePath) + + assert(df.columns(0) == "label") + assert(df.columns(1) == "features") + val row1 = df.first() + assert(row1.getDouble(0) == 1.0) + val v = row1.getAs[SparseVector](1) + assert(v == Vectors.sparse(6, Seq((0, 1.0), (2, 2.0), (4, 3.0)))) --- End diff -- Use `===` instead of `==`
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org