[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-14 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/spark/pull/16254


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-13 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/16254#discussion_r92137332
  
--- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala ---
@@ -32,177 +32,172 @@ import org.apache.spark._
 import org.apache.spark.util.Utils
 
 class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
+  val envCommand = if (Utils.isWindows) {
+"cmd.exe /C set"
+  } else {
+"printenv"
+  }
 
   test("basic pipe") {
-if (testCommandAvailable("cat")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+assume(testCommandAvailable("cat"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
 
-  val piped = nums.pipe(Seq("cat"))
+val piped = nums.pipe(Seq("cat"))
 
-  val c = piped.collect()
-  assert(c.size === 4)
-  assert(c(0) === "1")
-  assert(c(1) === "2")
-  assert(c(2) === "3")
-  assert(c(3) === "4")
-} else {
-  assert(true)
-}
+val c = piped.collect()
+assert(c.size === 4)
+assert(c(0) === "1")
+assert(c(1) === "2")
+assert(c(2) === "3")
+assert(c(3) === "4")
   }
 
   test("basic pipe with tokenization") {
-if (testCommandAvailable("wc")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-
-  // verify that both RDD.pipe(command: String) and RDD.pipe(command: 
String, env) work good
-  for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, 
String]( {
-val c = piped.collect()
-assert(c.size === 2)
-assert(c(0).trim === "2")
-assert(c(1).trim === "2")
-  }
-} else {
-  assert(true)
+assume(testCommandAvailable("wc"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+
+// verify that both RDD.pipe(command: String) and RDD.pipe(command: 
String, env) work good
+for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, 
String]( {
+  val c = piped.collect()
+  assert(c.size === 2)
+  assert(c(0).trim === "2")
+  assert(c(1).trim === "2")
 }
   }
 
   test("failure in iterating over pipe input") {
-if (testCommandAvailable("cat")) {
-  val nums =
-sc.makeRDD(Array(1, 2, 3, 4), 2)
-  .mapPartitionsWithIndex((index, iterator) => {
-new Iterator[Int] {
-  def hasNext = true
-  def next() = {
-throw new SparkException("Exception to simulate bad 
scenario")
-  }
-}
-  })
-
-  val piped = nums.pipe(Seq("cat"))
-
-  intercept[SparkException] {
-piped.collect()
-  }
+assume(testCommandAvailable("cat"))
+val nums =
+  sc.makeRDD(Array(1, 2, 3, 4), 2)
+.mapPartitionsWithIndex((index, iterator) => {
+new Iterator[Int] {
+  def hasNext = true
+  def next() = {
+throw new SparkException("Exception to simulate bad scenario")
+  }
+}
+  })
+
+val piped = nums.pipe(Seq("cat"))
+
+intercept[SparkException] {
+  piped.collect()
 }
   }
 
   test("advanced pipe") {
-if (testCommandAvailable("cat")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-  val bl = sc.broadcast(List("0"))
-
-  val piped = nums.pipe(Seq("cat"),
+assume(testCommandAvailable("cat"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+val bl = sc.broadcast(List("0"))
+
+val piped = nums.pipe(Seq("cat"),
+  Map[String, String](),
+  (f: String => Unit) => {
+bl.value.foreach(f); f("\u0001")
+  },
+  (i: Int, f: String => Unit) => f(i + "_"))
+
+val c = piped.collect()
+
+assert(c.size === 8)
+assert(c(0) === "0")
+assert(c(1) === "\u0001")
+assert(c(2) === "1_")
+assert(c(3) === "2_")
+assert(c(4) === "0")
+assert(c(5) === "\u0001")
+assert(c(6) === "3_")
+assert(c(7) === "4_")
+
+val nums1 = sc.makeRDD(Array("a\t1", "b\t2", "a\t3", "b\t4"), 2)
+val d = nums1.groupBy(str => str.split("\t")(0)).
+  pipe(Seq("cat"),
 Map[String, String](),
 (f: String => Unit) => {
   bl.value.foreach(f); f("\u0001")
 },
-(i: Int, f: String => Unit) => f(i + "_"))
-
-  val c = piped.collect()
-
-  assert(c.size === 8)
-  assert(c(0) === "0")
-  assert(c(1) === "\u0001")
-  assert(c(2) === "1_")
-  assert(c(3) === "2_")
-  assert(c(4) === "0")
- 

[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-13 Thread srowen
Github user srowen commented on a diff in the pull request:

https://github.com/apache/spark/pull/16254#discussion_r92136892
  
--- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala ---
@@ -32,177 +32,172 @@ import org.apache.spark._
 import org.apache.spark.util.Utils
 
 class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
+  val envCommand = if (Utils.isWindows) {
+"cmd.exe /C set"
+  } else {
+"printenv"
+  }
 
   test("basic pipe") {
-if (testCommandAvailable("cat")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+assume(testCommandAvailable("cat"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
 
-  val piped = nums.pipe(Seq("cat"))
+val piped = nums.pipe(Seq("cat"))
 
-  val c = piped.collect()
-  assert(c.size === 4)
-  assert(c(0) === "1")
-  assert(c(1) === "2")
-  assert(c(2) === "3")
-  assert(c(3) === "4")
-} else {
-  assert(true)
-}
+val c = piped.collect()
+assert(c.size === 4)
+assert(c(0) === "1")
+assert(c(1) === "2")
+assert(c(2) === "3")
+assert(c(3) === "4")
   }
 
   test("basic pipe with tokenization") {
-if (testCommandAvailable("wc")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-
-  // verify that both RDD.pipe(command: String) and RDD.pipe(command: 
String, env) work good
-  for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, 
String]( {
-val c = piped.collect()
-assert(c.size === 2)
-assert(c(0).trim === "2")
-assert(c(1).trim === "2")
-  }
-} else {
-  assert(true)
+assume(testCommandAvailable("wc"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+
+// verify that both RDD.pipe(command: String) and RDD.pipe(command: 
String, env) work good
+for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, 
String]( {
+  val c = piped.collect()
+  assert(c.size === 2)
+  assert(c(0).trim === "2")
+  assert(c(1).trim === "2")
 }
   }
 
   test("failure in iterating over pipe input") {
-if (testCommandAvailable("cat")) {
-  val nums =
-sc.makeRDD(Array(1, 2, 3, 4), 2)
-  .mapPartitionsWithIndex((index, iterator) => {
-new Iterator[Int] {
-  def hasNext = true
-  def next() = {
-throw new SparkException("Exception to simulate bad 
scenario")
-  }
-}
-  })
-
-  val piped = nums.pipe(Seq("cat"))
-
-  intercept[SparkException] {
-piped.collect()
-  }
+assume(testCommandAvailable("cat"))
+val nums =
+  sc.makeRDD(Array(1, 2, 3, 4), 2)
+.mapPartitionsWithIndex((index, iterator) => {
+new Iterator[Int] {
+  def hasNext = true
+  def next() = {
+throw new SparkException("Exception to simulate bad scenario")
+  }
+}
+  })
+
+val piped = nums.pipe(Seq("cat"))
+
+intercept[SparkException] {
+  piped.collect()
 }
   }
 
   test("advanced pipe") {
-if (testCommandAvailable("cat")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-  val bl = sc.broadcast(List("0"))
-
-  val piped = nums.pipe(Seq("cat"),
+assume(testCommandAvailable("cat"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+val bl = sc.broadcast(List("0"))
+
+val piped = nums.pipe(Seq("cat"),
+  Map[String, String](),
+  (f: String => Unit) => {
+bl.value.foreach(f); f("\u0001")
+  },
+  (i: Int, f: String => Unit) => f(i + "_"))
+
+val c = piped.collect()
+
+assert(c.size === 8)
+assert(c(0) === "0")
+assert(c(1) === "\u0001")
+assert(c(2) === "1_")
+assert(c(3) === "2_")
+assert(c(4) === "0")
+assert(c(5) === "\u0001")
+assert(c(6) === "3_")
+assert(c(7) === "4_")
+
+val nums1 = sc.makeRDD(Array("a\t1", "b\t2", "a\t3", "b\t4"), 2)
+val d = nums1.groupBy(str => str.split("\t")(0)).
+  pipe(Seq("cat"),
 Map[String, String](),
 (f: String => Unit) => {
   bl.value.foreach(f); f("\u0001")
 },
-(i: Int, f: String => Unit) => f(i + "_"))
-
-  val c = piped.collect()
-
-  assert(c.size === 8)
-  assert(c(0) === "0")
-  assert(c(1) === "\u0001")
-  assert(c(2) === "1_")
-  assert(c(3) === "2_")
-  assert(c(4) === "0")
-  asse

[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-13 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/16254#discussion_r92136208
  
--- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala ---
@@ -32,177 +32,172 @@ import org.apache.spark._
 import org.apache.spark.util.Utils
 
 class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
+  val envCommand = if (Utils.isWindows) {
+"cmd.exe /C set"
+  } else {
+"printenv"
+  }
 
   test("basic pipe") {
-if (testCommandAvailable("cat")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+assume(testCommandAvailable("cat"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
 
-  val piped = nums.pipe(Seq("cat"))
+val piped = nums.pipe(Seq("cat"))
 
-  val c = piped.collect()
-  assert(c.size === 4)
-  assert(c(0) === "1")
-  assert(c(1) === "2")
-  assert(c(2) === "3")
-  assert(c(3) === "4")
-} else {
-  assert(true)
-}
+val c = piped.collect()
+assert(c.size === 4)
+assert(c(0) === "1")
+assert(c(1) === "2")
+assert(c(2) === "3")
+assert(c(3) === "4")
   }
 
   test("basic pipe with tokenization") {
-if (testCommandAvailable("wc")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-
-  // verify that both RDD.pipe(command: String) and RDD.pipe(command: 
String, env) work good
-  for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, 
String]( {
-val c = piped.collect()
-assert(c.size === 2)
-assert(c(0).trim === "2")
-assert(c(1).trim === "2")
-  }
-} else {
-  assert(true)
+assume(testCommandAvailable("wc"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+
+// verify that both RDD.pipe(command: String) and RDD.pipe(command: 
String, env) work good
+for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, 
String]( {
+  val c = piped.collect()
+  assert(c.size === 2)
+  assert(c(0).trim === "2")
+  assert(c(1).trim === "2")
 }
   }
 
   test("failure in iterating over pipe input") {
-if (testCommandAvailable("cat")) {
-  val nums =
-sc.makeRDD(Array(1, 2, 3, 4), 2)
-  .mapPartitionsWithIndex((index, iterator) => {
-new Iterator[Int] {
-  def hasNext = true
-  def next() = {
-throw new SparkException("Exception to simulate bad 
scenario")
-  }
-}
-  })
-
-  val piped = nums.pipe(Seq("cat"))
-
-  intercept[SparkException] {
-piped.collect()
-  }
+assume(testCommandAvailable("cat"))
+val nums =
+  sc.makeRDD(Array(1, 2, 3, 4), 2)
+.mapPartitionsWithIndex((index, iterator) => {
+new Iterator[Int] {
+  def hasNext = true
+  def next() = {
+throw new SparkException("Exception to simulate bad scenario")
+  }
+}
+  })
+
+val piped = nums.pipe(Seq("cat"))
+
+intercept[SparkException] {
+  piped.collect()
 }
   }
 
   test("advanced pipe") {
-if (testCommandAvailable("cat")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-  val bl = sc.broadcast(List("0"))
-
-  val piped = nums.pipe(Seq("cat"),
+assume(testCommandAvailable("cat"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+val bl = sc.broadcast(List("0"))
+
+val piped = nums.pipe(Seq("cat"),
+  Map[String, String](),
+  (f: String => Unit) => {
+bl.value.foreach(f); f("\u0001")
+  },
+  (i: Int, f: String => Unit) => f(i + "_"))
+
+val c = piped.collect()
+
+assert(c.size === 8)
+assert(c(0) === "0")
+assert(c(1) === "\u0001")
+assert(c(2) === "1_")
+assert(c(3) === "2_")
+assert(c(4) === "0")
+assert(c(5) === "\u0001")
+assert(c(6) === "3_")
+assert(c(7) === "4_")
+
+val nums1 = sc.makeRDD(Array("a\t1", "b\t2", "a\t3", "b\t4"), 2)
+val d = nums1.groupBy(str => str.split("\t")(0)).
+  pipe(Seq("cat"),
 Map[String, String](),
 (f: String => Unit) => {
   bl.value.foreach(f); f("\u0001")
 },
-(i: Int, f: String => Unit) => f(i + "_"))
-
-  val c = piped.collect()
-
-  assert(c.size === 8)
-  assert(c(0) === "0")
-  assert(c(1) === "\u0001")
-  assert(c(2) === "1_")
-  assert(c(3) === "2_")
-  assert(c(4) === "0")
- 

[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-13 Thread srowen
Github user srowen commented on a diff in the pull request:

https://github.com/apache/spark/pull/16254#discussion_r92126915
  
--- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala ---
@@ -32,177 +32,172 @@ import org.apache.spark._
 import org.apache.spark.util.Utils
 
 class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
+  val envCommand = if (Utils.isWindows) {
+"cmd.exe /C set"
+  } else {
+"printenv"
+  }
 
   test("basic pipe") {
-if (testCommandAvailable("cat")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+assume(testCommandAvailable("cat"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
 
-  val piped = nums.pipe(Seq("cat"))
+val piped = nums.pipe(Seq("cat"))
 
-  val c = piped.collect()
-  assert(c.size === 4)
-  assert(c(0) === "1")
-  assert(c(1) === "2")
-  assert(c(2) === "3")
-  assert(c(3) === "4")
-} else {
-  assert(true)
-}
+val c = piped.collect()
+assert(c.size === 4)
+assert(c(0) === "1")
+assert(c(1) === "2")
+assert(c(2) === "3")
+assert(c(3) === "4")
   }
 
   test("basic pipe with tokenization") {
-if (testCommandAvailable("wc")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-
-  // verify that both RDD.pipe(command: String) and RDD.pipe(command: 
String, env) work good
-  for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, 
String]( {
-val c = piped.collect()
-assert(c.size === 2)
-assert(c(0).trim === "2")
-assert(c(1).trim === "2")
-  }
-} else {
-  assert(true)
+assume(testCommandAvailable("wc"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+
+// verify that both RDD.pipe(command: String) and RDD.pipe(command: 
String, env) work good
+for (piped <- Seq(nums.pipe("wc -l"), nums.pipe("wc -l", Map[String, 
String]( {
+  val c = piped.collect()
+  assert(c.size === 2)
+  assert(c(0).trim === "2")
+  assert(c(1).trim === "2")
 }
   }
 
   test("failure in iterating over pipe input") {
-if (testCommandAvailable("cat")) {
-  val nums =
-sc.makeRDD(Array(1, 2, 3, 4), 2)
-  .mapPartitionsWithIndex((index, iterator) => {
-new Iterator[Int] {
-  def hasNext = true
-  def next() = {
-throw new SparkException("Exception to simulate bad 
scenario")
-  }
-}
-  })
-
-  val piped = nums.pipe(Seq("cat"))
-
-  intercept[SparkException] {
-piped.collect()
-  }
+assume(testCommandAvailable("cat"))
+val nums =
+  sc.makeRDD(Array(1, 2, 3, 4), 2)
+.mapPartitionsWithIndex((index, iterator) => {
+new Iterator[Int] {
+  def hasNext = true
+  def next() = {
+throw new SparkException("Exception to simulate bad scenario")
+  }
+}
+  })
+
+val piped = nums.pipe(Seq("cat"))
+
+intercept[SparkException] {
+  piped.collect()
 }
   }
 
   test("advanced pipe") {
-if (testCommandAvailable("cat")) {
-  val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-  val bl = sc.broadcast(List("0"))
-
-  val piped = nums.pipe(Seq("cat"),
+assume(testCommandAvailable("cat"))
+val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
+val bl = sc.broadcast(List("0"))
+
+val piped = nums.pipe(Seq("cat"),
+  Map[String, String](),
+  (f: String => Unit) => {
+bl.value.foreach(f); f("\u0001")
+  },
+  (i: Int, f: String => Unit) => f(i + "_"))
+
+val c = piped.collect()
+
+assert(c.size === 8)
+assert(c(0) === "0")
+assert(c(1) === "\u0001")
+assert(c(2) === "1_")
+assert(c(3) === "2_")
+assert(c(4) === "0")
+assert(c(5) === "\u0001")
+assert(c(6) === "3_")
+assert(c(7) === "4_")
+
+val nums1 = sc.makeRDD(Array("a\t1", "b\t2", "a\t3", "b\t4"), 2)
+val d = nums1.groupBy(str => str.split("\t")(0)).
+  pipe(Seq("cat"),
 Map[String, String](),
 (f: String => Unit) => {
   bl.value.foreach(f); f("\u0001")
 },
-(i: Int, f: String => Unit) => f(i + "_"))
-
-  val c = piped.collect()
-
-  assert(c.size === 8)
-  assert(c(0) === "0")
-  assert(c(1) === "\u0001")
-  assert(c(2) === "1_")
-  assert(c(3) === "2_")
-  assert(c(4) === "0")
-  asse

[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-12 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/16254#discussion_r91950044
  
--- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala ---
@@ -142,17 +147,23 @@ class PipedRDDSuite extends SparkFunSuite with 
SharedSparkContext {
 val piped = data.pipe("wc -c")
 assert(piped.count == 8)
 val charCounts = piped.map(_.trim.toInt).collect().toSet
-assert(Set(0, 4, 5) == charCounts)
+val expected = if (Utils.isWindows) {
+  // Note that newline character on Windows is \r\n which are two.
+  Set(0, 5, 6)
+} else {
+  Set(0, 4, 5)
+}
+assert(expected == charCounts)
   }
 
   test("pipe with env variable") {
-if (testCommandAvailable("printenv")) {
+if (testCommandAvailable(envCommand)) {
   val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-  val piped = nums.pipe(Seq("printenv", "MY_TEST_ENV"), 
Map("MY_TEST_ENV" -> "LALALA"))
+  val piped = nums.pipe(s"$envCommand MY_TEST_ENV", Map("MY_TEST_ENV" 
-> "LALALA"))
   val c = piped.collect()
-  assert(c.size === 2)
-  assert(c(0) === "LALALA")
-  assert(c(1) === "LALALA")
+  assert(c.length === 2)
+  assert(c(0).contains("LALALA"))
+  assert(c(1).contains("LALALA"))
--- End diff --

Sure.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-12 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/16254#discussion_r91949608
  
--- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala ---
@@ -142,17 +147,23 @@ class PipedRDDSuite extends SparkFunSuite with 
SharedSparkContext {
 val piped = data.pipe("wc -c")
 assert(piped.count == 8)
 val charCounts = piped.map(_.trim.toInt).collect().toSet
-assert(Set(0, 4, 5) == charCounts)
+val expected = if (Utils.isWindows) {
+  // Note that newline character on Windows is \r\n which are two.
+  Set(0, 5, 6)
+} else {
+  Set(0, 4, 5)
+}
+assert(expected == charCounts)
   }
 
   test("pipe with env variable") {
-if (testCommandAvailable("printenv")) {
+if (testCommandAvailable(envCommand)) {
--- End diff --

`cmd.exe /C set [non-existing-varname]` exits with `-1` as expected - 
https://ci.appveyor.com/project/spark-test/spark/build/227-PipedRDDSuite.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-12 Thread srowen
Github user srowen commented on a diff in the pull request:

https://github.com/apache/spark/pull/16254#discussion_r91949387
  
--- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala ---
@@ -142,17 +147,23 @@ class PipedRDDSuite extends SparkFunSuite with 
SharedSparkContext {
 val piped = data.pipe("wc -c")
 assert(piped.count == 8)
 val charCounts = piped.map(_.trim.toInt).collect().toSet
-assert(Set(0, 4, 5) == charCounts)
+val expected = if (Utils.isWindows) {
+  // Note that newline character on Windows is \r\n which are two.
+  Set(0, 5, 6)
+} else {
+  Set(0, 4, 5)
+}
+assert(expected == charCounts)
   }
 
   test("pipe with env variable") {
-if (testCommandAvailable("printenv")) {
+if (testCommandAvailable(envCommand)) {
   val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-  val piped = nums.pipe(Seq("printenv", "MY_TEST_ENV"), 
Map("MY_TEST_ENV" -> "LALALA"))
+  val piped = nums.pipe(s"$envCommand MY_TEST_ENV", Map("MY_TEST_ENV" 
-> "LALALA"))
   val c = piped.collect()
-  assert(c.size === 2)
-  assert(c(0) === "LALALA")
-  assert(c(1) === "LALALA")
+  assert(c.length === 2)
+  assert(c(0).contains("LALALA"))
+  assert(c(1).contains("LALALA"))
--- End diff --

Can you check `c(0).stripPrefix("MY_TEST_ENV=") === "LALALA")`? 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-12 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/16254#discussion_r91948938
  
--- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala ---
@@ -245,7 +256,7 @@ class PipedRDDSuite extends SparkFunSuite with 
SharedSparkContext {
   val tContext = TaskContext.empty()
   val rddIter = pipedRdd.compute(hadoopPart1, tContext)
   val arr = rddIter.toArray
-  assert(arr(0) == "/some/path")
+  assert(arr(0).contains("/some/path"))
--- End diff --

Ditto.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-12 Thread HyukjinKwon
Github user HyukjinKwon commented on a diff in the pull request:

https://github.com/apache/spark/pull/16254#discussion_r91948877
  
--- Diff: core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala ---
@@ -142,17 +147,23 @@ class PipedRDDSuite extends SparkFunSuite with 
SharedSparkContext {
 val piped = data.pipe("wc -c")
 assert(piped.count == 8)
 val charCounts = piped.map(_.trim.toInt).collect().toSet
-assert(Set(0, 4, 5) == charCounts)
+val expected = if (Utils.isWindows) {
+  // Note that newline character on Windows is \r\n which are two.
+  Set(0, 5, 6)
+} else {
+  Set(0, 4, 5)
+}
+assert(expected == charCounts)
   }
 
   test("pipe with env variable") {
-if (testCommandAvailable("printenv")) {
+if (testCommandAvailable(envCommand)) {
   val nums = sc.makeRDD(Array(1, 2, 3, 4), 2)
-  val piped = nums.pipe(Seq("printenv", "MY_TEST_ENV"), 
Map("MY_TEST_ENV" -> "LALALA"))
+  val piped = nums.pipe(s"$envCommand MY_TEST_ENV", Map("MY_TEST_ENV" 
-> "LALALA"))
   val c = piped.collect()
-  assert(c.size === 2)
-  assert(c(0) === "LALALA")
-  assert(c(1) === "LALALA")
+  assert(c.length === 2)
+  assert(c(0).contains("LALALA"))
+  assert(c(1).contains("LALALA"))
--- End diff --

This had to be contains because `cmd.exe /C set MY_TEST_ENV` prints

```
MY_TEST_ENV=LALALA
```

whereas `printenv` prints

```
LALALA
```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #16254: [SPARK-18830][TESTS] Fix tests in PipedRDDSuite t...

2016-12-12 Thread HyukjinKwon
GitHub user HyukjinKwon opened a pull request:

https://github.com/apache/spark/pull/16254

[SPARK-18830][TESTS] Fix tests in PipedRDDSuite to pass on Winodws

## What changes were proposed in this pull request?

This PR proposes to fix the tests failed on Windows as below:

```
[info] - pipe with empty partition *** FAILED *** (672 milliseconds)
[info]   Set(0, 4, 5) did not equal Set(0, 5, 6) (PipedRDDSuite.scala:145)
[info]   org.scalatest.exceptions.TestFailedException:
...
```

In this case, `wc -c` counts the characters on both Windows and Linux but 
the newlines characters on Windows are `\r\n` which are two. So, the counts 
ends up one more for each.

```
[info] - test pipe exports map_input_file *** FAILED *** (62 milliseconds)
[info]   java.lang.IllegalStateException: Subprocess exited with status 1. 
Command ran: printenv map_input_file
[info]   at 
org.apache.spark.rdd.PipedRDD$$anon$1.hasNext(PipedRDD.scala:178)
...
```

```
[info] - test pipe exports mapreduce_map_input_file *** FAILED *** (172 
milliseconds)
[info]   java.lang.IllegalStateException: Subprocess exited with status 1. 
Command ran: printenv mapreduce_map_input_file
[info]   at 
org.apache.spark.rdd.PipedRDD$$anon$1.hasNext(PipedRDD.scala:178)
...
```

This command prints the environment variables; however, when environment 
variables are set to `ProcessBuilder` as lower-cased keys, `printenv` in 
Windows ignores and does not print this although it is actually set and 
accessible. (this was tested in 
[here](https://ci.appveyor.com/project/spark-test/spark/build/208-PipedRDDSuite)
 for upper-cases with this 
[diff](https://github.com/apache/spark/compare/master...spark-test:74d39da) and 
[here](https://ci.appveyor.com/project/spark-test/spark/build/203-PipedRDDSuite)
 for lower-cases with this 
[diff](https://github.com/apache/spark/compare/master...spark-test:fde5e37f28032c15a8d8693ba033a8a779a26317).
(Note that environment variables on Windows are case-insensitive).

This is (I believe) a thirdparty tool on Windows that resembles `printenv` 
on Linux (installed in AppVeyor environment or Windows Server 2012 R2). This 
command does not exist, at least, for Windows 7 and 10.

On Windows, we can use `cmd.exe /C set [varname]` officially for this 
purpose. We could fix the tests with this in order to test if the environment 
variable is set. 

## How was this patch tested?

Manually tested via AppVeyor.

**Before**
https://ci.appveyor.com/project/spark-test/spark/build/194-PipedRDDSuite

**After**
https://ci.appveyor.com/project/spark-test/spark/build/226-PipedRDDSuite

You can merge this pull request into a Git repository by running:

$ git pull https://github.com/HyukjinKwon/spark pipe-errors

Alternatively you can review and apply these changes as the patch at:

https://github.com/apache/spark/pull/16254.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

This closes #16254


commit 1cd63b2dedb5c34f66bb370a7b4ce278d9f4ea70
Author: hyukjinkwon 
Date:   2016-12-12T13:15:01Z

Fix tests in PipedRDDSuite to pass on Winodws




---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org