Repository: spark Updated Branches: refs/heads/branch-1.6 b8da77ef7 -> 1fbcb6e7b
[SPARK-12517] add default RDD name for one created via sc.textFile The feature was first added at commit: 7b877b27053bfb7092e250e01a3b887e1b50a109 but was later removed (probably by mistake) at commit: fc8b58195afa67fbb75b4c8303e022f703cbf007. This change sets the default path of RDDs created via sc.textFile(...) to the path argument. Here is the symptom: * Using spark-1.5.2-bin-hadoop2.6: scala> sc.textFile("/home/root/.bashrc").name res5: String = null scala> sc.binaryFiles("/home/root/.bashrc").name res6: String = /home/root/.bashrc * while using Spark 1.3.1: scala> sc.textFile("/home/root/.bashrc").name res0: String = /home/root/.bashrc scala> sc.binaryFiles("/home/root/.bashrc").name res1: String = /home/root/.bashrc Author: Yaron Weinsberg <wya...@gmail.com> Author: yaron <ya...@il.ibm.com> Closes #10456 from wyaron/master. (cherry picked from commit 73b70f076d4e22396b7e145f2ce5974fbf788048) Signed-off-by: Kousuke Saruta <saru...@oss.nttdata.co.jp> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1fbcb6e7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1fbcb6e7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1fbcb6e7 Branch: refs/heads/branch-1.6 Commit: 1fbcb6e7be9cd9fa5255837cfc5358f2283f4aaf Parents: b8da77e Author: Yaron Weinsberg <wya...@gmail.com> Authored: Tue Dec 29 05:19:11 2015 +0900 Committer: Kousuke Saruta <saru...@oss.nttdata.co.jp> Committed: Tue Dec 29 05:19:29 2015 +0900 ---------------------------------------------------------------------- .../scala/org/apache/spark/SparkContext.scala | 4 ++-- .../org/apache/spark/SparkContextSuite.scala | 25 ++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/1fbcb6e7/core/src/main/scala/org/apache/spark/SparkContext.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 8a62b71..add3f04 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -830,7 +830,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli minPartitions: Int = defaultMinPartitions): RDD[String] = withScope { assertNotStopped() hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], - minPartitions).map(pair => pair._2.toString) + minPartitions).map(pair => pair._2.toString).setName(path) } /** @@ -879,7 +879,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli classOf[Text], classOf[Text], updateConf, - minPartitions).setName(path).map(record => (record._1.toString, record._2.toString)) + minPartitions).map(record => (record._1.toString, record._2.toString)).setName(path) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/1fbcb6e7/core/src/test/scala/org/apache/spark/SparkContextSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index d4f2ea8..172ef05 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -274,6 +274,31 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext { } } + test("Default path for file based RDDs is properly set (SPARK-12517)") { + sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) + + // Test filetextFile, wholeTextFiles, binaryFiles, hadoopFile and + // newAPIHadoopFile for setting the default path as the RDD name + val mockPath = "default/path/for/" + + var targetPath = mockPath + "textFile" + assert(sc.textFile(targetPath).name === targetPath) + + targetPath = mockPath + "wholeTextFiles" + assert(sc.wholeTextFiles(targetPath).name === targetPath) + + targetPath = mockPath + "binaryFiles" + assert(sc.binaryFiles(targetPath).name === targetPath) + + targetPath = mockPath + "hadoopFile" + assert(sc.hadoopFile(targetPath).name === targetPath) + + targetPath = mockPath + "newAPIHadoopFile" + assert(sc.newAPIHadoopFile(targetPath).name === targetPath) + + sc.stop() + } + test("calling multiple sc.stop() must not throw any exception") { noException should be thrownBy { sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org