[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/16254 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/16254#discussion_r92137332 --- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala --- @@ -32,177 +32,172 @@ import org.apache.spark._ import org.apache.spark.util.Utils class PipedRDDSuite extends SparkFunSuite with SharedSparkContext { + val envCommand = if (Utils.isWindows) { +"cmd.exe /C set" + } else { +"printenv" + } test("basic pipe") { -if (testCommandAvailable("cat")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) +assume(testCommandAvailable("cat")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val piped = nums.pipe(Seq("cat")) +val piped = nums.pipe(Seq("cat")) - val c = piped.collect() - assert(c.size === 4) - assert(c(0) === "1") - assert(c(1) === "2") - assert(c(2) === "3") - assert(c(3) === "4") -} else { - assert(true) -} +val c = piped.collect() +assert(c.size === 4) +assert(c(0) === "1") +assert(c(1) === "2") +assert(c(2) === "3") +assert(c(3) === "4") } test("basic pipe with tokenization") { -if (testCommandAvailable("wc")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - - // verify that both RDD.pipe(command: String) and RDD.pipe(command: String, env) work good - for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, String]( { -val c = piped.collect() -assert(c.size === 2) -assert(c(0).trim === "2") -assert(c(1).trim === "2") - } -} else { - assert(true) +assume(testCommandAvailable("wc")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) + +// verify that both RDD.pipe(command: String) and RDD.pipe(command: String, env) work good +for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, String]( { + val c = piped.collect() + assert(c.size === 2) + assert(c(0).trim === "2") + assert(c(1).trim === "2") } } test("failure in iterating over pipe input") { -if (testCommandAvailable("cat")) { - val nums = -sc.makeRDD(Array(1, 2, 3, 4), 2) - .mapPartitionsWithIndex((index, iterator) => { -new Iterator[Int] { - def hasNext = true - def next() = { -throw new SparkException("Exception to simulate bad scenario") - } -} - }) - - val piped = nums.pipe(Seq("cat")) - - intercept[SparkException] { -piped.collect() - } +assume(testCommandAvailable("cat")) +val nums = + sc.makeRDD(Array(1, 2, 3, 4), 2) +.mapPartitionsWithIndex((index, iterator) => { +new Iterator[Int] { + def hasNext = true + def next() = { +throw new SparkException("Exception to simulate bad scenario") + } +} + }) + +val piped = nums.pipe(Seq("cat")) + +intercept[SparkException] { + piped.collect() } } test("advanced pipe") { -if (testCommandAvailable("cat")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val bl = sc.broadcast(List("0")) - - val piped = nums.pipe(Seq("cat"), +assume(testCommandAvailable("cat")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) +val bl = sc.broadcast(List("0")) + +val piped = nums.pipe(Seq("cat"), + Map[String, String](), + (f: String => Unit) => { +bl.value.foreach(f); f("\u0001") + }, + (i: Int, f: String => Unit) => f(i + "_")) + +val c = piped.collect() + +assert(c.size === 8) +assert(c(0) === "0") +assert(c(1) === "\u0001") +assert(c(2) === "1_") +assert(c(3) === "2_") +assert(c(4) === "0") +assert(c(5) === "\u0001") +assert(c(6) === "3_") +assert(c(7) === "4_") + +val nums1 = sc.makeRDD(Array("a\t1", "b\t2", "a\t3", "b\t4"), 2) +val d = nums1.groupBy(str => str.split("\t")(0)). + pipe(Seq("cat"), Map[String, String](), (f: String => Unit) => { bl.value.foreach(f); f("\u0001") }, -(i: Int, f: String => Unit) => f(i + "_")) - - val c = piped.collect() - - assert(c.size === 8) - assert(c(0) === "0") - assert(c(1) === "\u0001") - assert(c(2) === "1_") - assert(c(3) === "2_") - assert(c(4) === "0") -
[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/16254#discussion_r92136892 --- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala --- @@ -32,177 +32,172 @@ import org.apache.spark._ import org.apache.spark.util.Utils class PipedRDDSuite extends SparkFunSuite with SharedSparkContext { + val envCommand = if (Utils.isWindows) { +"cmd.exe /C set" + } else { +"printenv" + } test("basic pipe") { -if (testCommandAvailable("cat")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) +assume(testCommandAvailable("cat")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val piped = nums.pipe(Seq("cat")) +val piped = nums.pipe(Seq("cat")) - val c = piped.collect() - assert(c.size === 4) - assert(c(0) === "1") - assert(c(1) === "2") - assert(c(2) === "3") - assert(c(3) === "4") -} else { - assert(true) -} +val c = piped.collect() +assert(c.size === 4) +assert(c(0) === "1") +assert(c(1) === "2") +assert(c(2) === "3") +assert(c(3) === "4") } test("basic pipe with tokenization") { -if (testCommandAvailable("wc")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - - // verify that both RDD.pipe(command: String) and RDD.pipe(command: String, env) work good - for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, String]( { -val c = piped.collect() -assert(c.size === 2) -assert(c(0).trim === "2") -assert(c(1).trim === "2") - } -} else { - assert(true) +assume(testCommandAvailable("wc")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) + +// verify that both RDD.pipe(command: String) and RDD.pipe(command: String, env) work good +for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, String]( { + val c = piped.collect() + assert(c.size === 2) + assert(c(0).trim === "2") + assert(c(1).trim === "2") } } test("failure in iterating over pipe input") { -if (testCommandAvailable("cat")) { - val nums = -sc.makeRDD(Array(1, 2, 3, 4), 2) - .mapPartitionsWithIndex((index, iterator) => { -new Iterator[Int] { - def hasNext = true - def next() = { -throw new SparkException("Exception to simulate bad scenario") - } -} - }) - - val piped = nums.pipe(Seq("cat")) - - intercept[SparkException] { -piped.collect() - } +assume(testCommandAvailable("cat")) +val nums = + sc.makeRDD(Array(1, 2, 3, 4), 2) +.mapPartitionsWithIndex((index, iterator) => { +new Iterator[Int] { + def hasNext = true + def next() = { +throw new SparkException("Exception to simulate bad scenario") + } +} + }) + +val piped = nums.pipe(Seq("cat")) + +intercept[SparkException] { + piped.collect() } } test("advanced pipe") { -if (testCommandAvailable("cat")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val bl = sc.broadcast(List("0")) - - val piped = nums.pipe(Seq("cat"), +assume(testCommandAvailable("cat")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) +val bl = sc.broadcast(List("0")) + +val piped = nums.pipe(Seq("cat"), + Map[String, String](), + (f: String => Unit) => { +bl.value.foreach(f); f("\u0001") + }, + (i: Int, f: String => Unit) => f(i + "_")) + +val c = piped.collect() + +assert(c.size === 8) +assert(c(0) === "0") +assert(c(1) === "\u0001") +assert(c(2) === "1_") +assert(c(3) === "2_") +assert(c(4) === "0") +assert(c(5) === "\u0001") +assert(c(6) === "3_") +assert(c(7) === "4_") + +val nums1 = sc.makeRDD(Array("a\t1", "b\t2", "a\t3", "b\t4"), 2) +val d = nums1.groupBy(str => str.split("\t")(0)). + pipe(Seq("cat"), Map[String, String](), (f: String => Unit) => { bl.value.foreach(f); f("\u0001") }, -(i: Int, f: String => Unit) => f(i + "_")) - - val c = piped.collect() - - assert(c.size === 8) - assert(c(0) === "0") - assert(c(1) === "\u0001") - assert(c(2) === "1_") - assert(c(3) === "2_") - assert(c(4) === "0") - asse
[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/16254#discussion_r92136208 --- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala --- @@ -32,177 +32,172 @@ import org.apache.spark._ import org.apache.spark.util.Utils class PipedRDDSuite extends SparkFunSuite with SharedSparkContext { + val envCommand = if (Utils.isWindows) { +"cmd.exe /C set" + } else { +"printenv" + } test("basic pipe") { -if (testCommandAvailable("cat")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) +assume(testCommandAvailable("cat")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val piped = nums.pipe(Seq("cat")) +val piped = nums.pipe(Seq("cat")) - val c = piped.collect() - assert(c.size === 4) - assert(c(0) === "1") - assert(c(1) === "2") - assert(c(2) === "3") - assert(c(3) === "4") -} else { - assert(true) -} +val c = piped.collect() +assert(c.size === 4) +assert(c(0) === "1") +assert(c(1) === "2") +assert(c(2) === "3") +assert(c(3) === "4") } test("basic pipe with tokenization") { -if (testCommandAvailable("wc")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - - // verify that both RDD.pipe(command: String) and RDD.pipe(command: String, env) work good - for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, String]( { -val c = piped.collect() -assert(c.size === 2) -assert(c(0).trim === "2") -assert(c(1).trim === "2") - } -} else { - assert(true) +assume(testCommandAvailable("wc")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) + +// verify that both RDD.pipe(command: String) and RDD.pipe(command: String, env) work good +for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, String]( { + val c = piped.collect() + assert(c.size === 2) + assert(c(0).trim === "2") + assert(c(1).trim === "2") } } test("failure in iterating over pipe input") { -if (testCommandAvailable("cat")) { - val nums = -sc.makeRDD(Array(1, 2, 3, 4), 2) - .mapPartitionsWithIndex((index, iterator) => { -new Iterator[Int] { - def hasNext = true - def next() = { -throw new SparkException("Exception to simulate bad scenario") - } -} - }) - - val piped = nums.pipe(Seq("cat")) - - intercept[SparkException] { -piped.collect() - } +assume(testCommandAvailable("cat")) +val nums = + sc.makeRDD(Array(1, 2, 3, 4), 2) +.mapPartitionsWithIndex((index, iterator) => { +new Iterator[Int] { + def hasNext = true + def next() = { +throw new SparkException("Exception to simulate bad scenario") + } +} + }) + +val piped = nums.pipe(Seq("cat")) + +intercept[SparkException] { + piped.collect() } } test("advanced pipe") { -if (testCommandAvailable("cat")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val bl = sc.broadcast(List("0")) - - val piped = nums.pipe(Seq("cat"), +assume(testCommandAvailable("cat")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) +val bl = sc.broadcast(List("0")) + +val piped = nums.pipe(Seq("cat"), + Map[String, String](), + (f: String => Unit) => { +bl.value.foreach(f); f("\u0001") + }, + (i: Int, f: String => Unit) => f(i + "_")) + +val c = piped.collect() + +assert(c.size === 8) +assert(c(0) === "0") +assert(c(1) === "\u0001") +assert(c(2) === "1_") +assert(c(3) === "2_") +assert(c(4) === "0") +assert(c(5) === "\u0001") +assert(c(6) === "3_") +assert(c(7) === "4_") + +val nums1 = sc.makeRDD(Array("a\t1", "b\t2", "a\t3", "b\t4"), 2) +val d = nums1.groupBy(str => str.split("\t")(0)). + pipe(Seq("cat"), Map[String, String](), (f: String => Unit) => { bl.value.foreach(f); f("\u0001") }, -(i: Int, f: String => Unit) => f(i + "_")) - - val c = piped.collect() - - assert(c.size === 8) - assert(c(0) === "0") - assert(c(1) === "\u0001") - assert(c(2) === "1_") - assert(c(3) === "2_") - assert(c(4) === "0") -
[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/16254#discussion_r92126915 --- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala --- @@ -32,177 +32,172 @@ import org.apache.spark._ import org.apache.spark.util.Utils class PipedRDDSuite extends SparkFunSuite with SharedSparkContext { + val envCommand = if (Utils.isWindows) { +"cmd.exe /C set" + } else { +"printenv" + } test("basic pipe") { -if (testCommandAvailable("cat")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) +assume(testCommandAvailable("cat")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val piped = nums.pipe(Seq("cat")) +val piped = nums.pipe(Seq("cat")) - val c = piped.collect() - assert(c.size === 4) - assert(c(0) === "1") - assert(c(1) === "2") - assert(c(2) === "3") - assert(c(3) === "4") -} else { - assert(true) -} +val c = piped.collect() +assert(c.size === 4) +assert(c(0) === "1") +assert(c(1) === "2") +assert(c(2) === "3") +assert(c(3) === "4") } test("basic pipe with tokenization") { -if (testCommandAvailable("wc")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - - // verify that both RDD.pipe(command: String) and RDD.pipe(command: String, env) work good - for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, String]( { -val c = piped.collect() -assert(c.size === 2) -assert(c(0).trim === "2") -assert(c(1).trim === "2") - } -} else { - assert(true) +assume(testCommandAvailable("wc")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) + +// verify that both RDD.pipe(command: String) and RDD.pipe(command: String, env) work good +for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, String]( { + val c = piped.collect() + assert(c.size === 2) + assert(c(0).trim === "2") + assert(c(1).trim === "2") } } test("failure in iterating over pipe input") { -if (testCommandAvailable("cat")) { - val nums = -sc.makeRDD(Array(1, 2, 3, 4), 2) - .mapPartitionsWithIndex((index, iterator) => { -new Iterator[Int] { - def hasNext = true - def next() = { -throw new SparkException("Exception to simulate bad scenario") - } -} - }) - - val piped = nums.pipe(Seq("cat")) - - intercept[SparkException] { -piped.collect() - } +assume(testCommandAvailable("cat")) +val nums = + sc.makeRDD(Array(1, 2, 3, 4), 2) +.mapPartitionsWithIndex((index, iterator) => { +new Iterator[Int] { + def hasNext = true + def next() = { +throw new SparkException("Exception to simulate bad scenario") + } +} + }) + +val piped = nums.pipe(Seq("cat")) + +intercept[SparkException] { + piped.collect() } } test("advanced pipe") { -if (testCommandAvailable("cat")) { - val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val bl = sc.broadcast(List("0")) - - val piped = nums.pipe(Seq("cat"), +assume(testCommandAvailable("cat")) +val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) +val bl = sc.broadcast(List("0")) + +val piped = nums.pipe(Seq("cat"), + Map[String, String](), + (f: String => Unit) => { +bl.value.foreach(f); f("\u0001") + }, + (i: Int, f: String => Unit) => f(i + "_")) + +val c = piped.collect() + +assert(c.size === 8) +assert(c(0) === "0") +assert(c(1) === "\u0001") +assert(c(2) === "1_") +assert(c(3) === "2_") +assert(c(4) === "0") +assert(c(5) === "\u0001") +assert(c(6) === "3_") +assert(c(7) === "4_") + +val nums1 = sc.makeRDD(Array("a\t1", "b\t2", "a\t3", "b\t4"), 2) +val d = nums1.groupBy(str => str.split("\t")(0)). + pipe(Seq("cat"), Map[String, String](), (f: String => Unit) => { bl.value.foreach(f); f("\u0001") }, -(i: Int, f: String => Unit) => f(i + "_")) - - val c = piped.collect() - - assert(c.size === 8) - assert(c(0) === "0") - assert(c(1) === "\u0001") - assert(c(2) === "1_") - assert(c(3) === "2_") - assert(c(4) === "0") - asse
[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/16254#discussion_r91950044 --- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala --- @@ -142,17 +147,23 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext { val piped = data.pipe("wc -c") assert(piped.count == 8) val charCounts = piped.map(_.trim.toInt).collect().toSet -assert(Set(0, 4, 5) == charCounts) +val expected = if (Utils.isWindows) { + // Note that newline character on Windows is \r\n which are two. + Set(0, 5, 6) +} else { + Set(0, 4, 5) +} +assert(expected == charCounts) } test("pipe with env variable") { -if (testCommandAvailable("printenv")) { +if (testCommandAvailable(envCommand)) { val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val piped = nums.pipe(Seq("printenv", "MY_TEST_ENV"), Map("MY_TEST_ENV" -> "LALALA")) + val piped = nums.pipe(s"$envCommand MY_TEST_ENV", Map("MY_TEST_ENV" -> "LALALA")) val c = piped.collect() - assert(c.size === 2) - assert(c(0) === "LALALA") - assert(c(1) === "LALALA") + assert(c.length === 2) + assert(c(0).contains("LALALA")) + assert(c(1).contains("LALALA")) --- End diff -- Sure. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/16254#discussion_r91949608 --- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala --- @@ -142,17 +147,23 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext { val piped = data.pipe("wc -c") assert(piped.count == 8) val charCounts = piped.map(_.trim.toInt).collect().toSet -assert(Set(0, 4, 5) == charCounts) +val expected = if (Utils.isWindows) { + // Note that newline character on Windows is \r\n which are two. + Set(0, 5, 6) +} else { + Set(0, 4, 5) +} +assert(expected == charCounts) } test("pipe with env variable") { -if (testCommandAvailable("printenv")) { +if (testCommandAvailable(envCommand)) { --- End diff -- `cmd.exe /C set [non-existing-varname]` exits with `-1` as expected - https://ci.appveyor.com/project/spark-test/spark/build/227-PipedRDDSuite. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/16254#discussion_r91949387 --- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala --- @@ -142,17 +147,23 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext { val piped = data.pipe("wc -c") assert(piped.count == 8) val charCounts = piped.map(_.trim.toInt).collect().toSet -assert(Set(0, 4, 5) == charCounts) +val expected = if (Utils.isWindows) { + // Note that newline character on Windows is \r\n which are two. + Set(0, 5, 6) +} else { + Set(0, 4, 5) +} +assert(expected == charCounts) } test("pipe with env variable") { -if (testCommandAvailable("printenv")) { +if (testCommandAvailable(envCommand)) { val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val piped = nums.pipe(Seq("printenv", "MY_TEST_ENV"), Map("MY_TEST_ENV" -> "LALALA")) + val piped = nums.pipe(s"$envCommand MY_TEST_ENV", Map("MY_TEST_ENV" -> "LALALA")) val c = piped.collect() - assert(c.size === 2) - assert(c(0) === "LALALA") - assert(c(1) === "LALALA") + assert(c.length === 2) + assert(c(0).contains("LALALA")) + assert(c(1).contains("LALALA")) --- End diff -- Can you check `c(0).stripPrefix("MY_TEST_ENV=") === "LALALA")`? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/16254#discussion_r91948938 --- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala --- @@ -245,7 +256,7 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext { val tContext = TaskContext.empty() val rddIter = pipedRdd.compute(hadoopPart1, tContext) val arr = rddIter.toArray - assert(arr(0) == "/some/path") + assert(arr(0).contains("/some/path")) --- End diff -- Ditto. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/16254#discussion_r91948877 --- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala --- @@ -142,17 +147,23 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext { val piped = data.pipe("wc -c") assert(piped.count == 8) val charCounts = piped.map(_.trim.toInt).collect().toSet -assert(Set(0, 4, 5) == charCounts) +val expected = if (Utils.isWindows) { + // Note that newline character on Windows is \r\n which are two. + Set(0, 5, 6) +} else { + Set(0, 4, 5) +} +assert(expected == charCounts) } test("pipe with env variable") { -if (testCommandAvailable("printenv")) { +if (testCommandAvailable(envCommand)) { val nums = sc.makeRDD(Array(1, 2, 3, 4), 2) - val piped = nums.pipe(Seq("printenv", "MY_TEST_ENV"), Map("MY_TEST_ENV" -> "LALALA")) + val piped = nums.pipe(s"$envCommand MY_TEST_ENV", Map("MY_TEST_ENV" -> "LALALA")) val c = piped.collect() - assert(c.size === 2) - assert(c(0) === "LALALA") - assert(c(1) === "LALALA") + assert(c.length === 2) + assert(c(0).contains("LALALA")) + assert(c(1).contains("LALALA")) --- End diff -- This had to be contains because `cmd.exe /C set MY_TEST_ENV` prints ``` MY_TEST_ENV=LALALA ``` whereas `printenv` prints ``` LALALA ``` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...
GitHub user HyukjinKwon opened a pull request: https://github.com/apache/spark/pull/16254 [SPARK-18830][TESTS] Fix tests in PipedRDDSuite to pass on Winodws ## What changes were proposed in this pull request? This PR proposes to fix the tests failed on Windows as below: ``` [info] - pipe with empty partition *** FAILED *** (672 milliseconds) [info] Set(0, 4, 5) did not equal Set(0, 5, 6) (PipedRDDSuite.scala:145) [info] org.scalatest.exceptions.TestFailedException: ... ``` In this case, `wc -c` counts the characters on both Windows and Linux but the newlines characters on Windows are `\r\n` which are two. So, the counts ends up one more for each. ``` [info] - test pipe exports map_input_file *** FAILED *** (62 milliseconds) [info] java.lang.IllegalStateException: Subprocess exited with status 1. Command ran: printenv map_input_file [info] at org.apache.spark.rdd.PipedRDD$$anon$1.hasNext(PipedRDD.scala:178) ... ``` ``` [info] - test pipe exports mapreduce_map_input_file *** FAILED *** (172 milliseconds) [info] java.lang.IllegalStateException: Subprocess exited with status 1. Command ran: printenv mapreduce_map_input_file [info] at org.apache.spark.rdd.PipedRDD$$anon$1.hasNext(PipedRDD.scala:178) ... ``` This command prints the environment variables; however, when environment variables are set to `ProcessBuilder` as lower-cased keys, `printenv` in Windows ignores and does not print this although it is actually set and accessible. (this was tested in [here](https://ci.appveyor.com/project/spark-test/spark/build/208-PipedRDDSuite) for upper-cases with this [diff](https://github.com/apache/spark/compare/master...spark-test:74d39da) and [here](https://ci.appveyor.com/project/spark-test/spark/build/203-PipedRDDSuite) for lower-cases with this [diff](https://github.com/apache/spark/compare/master...spark-test:fde5e37f28032c15a8d8693ba033a8a779a26317). (Note that environment variables on Windows are case-insensitive). This is (I believe) a thirdparty tool on Windows that resembles `printenv` on Linux (installed in AppVeyor environment or Windows Server 2012 R2). This command does not exist, at least, for Windows 7 and 10. On Windows, we can use `cmd.exe /C set [varname]` officially for this purpose. We could fix the tests with this in order to test if the environment variable is set. ## How was this patch tested? Manually tested via AppVeyor. **Before** https://ci.appveyor.com/project/spark-test/spark/build/194-PipedRDDSuite **After** https://ci.appveyor.com/project/spark-test/spark/build/226-PipedRDDSuite You can merge this pull request into a Git repository by running: $ git pull https://github.com/HyukjinKwon/spark pipe-errors Alternatively you can review and apply these changes as the patch at: https://github.com/apache/spark/pull/16254.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #16254 commit 1cd63b2dedb5c34f66bb370a7b4ce278d9f4ea70 Author: hyukjinkwon Date: 2016-12-12T13:15:01Z Fix tests in PipedRDDSuite to pass on Winodws --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org