[spark] branch master updated: [SPARK-27961][SQL] DataSourceV2Relation should not have refresh method

dongjoon Sat, 08 Jun 2019 11:00:57 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new db0f6b4  [SPARK-27961][SQL] DataSourceV2Relation should not have 
refresh method
db0f6b4 is described below

commit db0f6b46749dc5cddd54c45e0dc5f95574c9e4f1
Author: Gengliang Wang <gengliang.w...@databricks.com>
AuthorDate: Sat Jun 8 10:59:10 2019 -0700

    [SPARK-27961][SQL] DataSourceV2Relation should not have refresh method
    
    ## What changes were proposed in this pull request?
    
    The newly added `Refresh` method in PR #24401 prevented the work of moving 
DataSourceV2Relation into catalyst. It calls `case table: FileTable => 
table.fileIndex.refresh()` while `FileTable` belongs to sql/core.
    
    More importantly, Ryan Blue pointed out DataSourceV2Relation is immutable 
by design, it should not have refresh method.
    
    ## How was this patch tested?
    
    Unit test
    
    Closes #24815 from gengliangwang/removeRefreshTable.
    
    Authored-by: Gengliang Wang <gengliang.w...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../datasources/v2/DataSourceV2Relation.scala      |  5 ----
 .../datasources/v2/FilePartitionReader.scala       |  3 +--
 .../org/apache/spark/sql/MetadataCacheSuite.scala  | 27 +++++++++++-----------
 3 files changed, 14 insertions(+), 21 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 27875b3..fc91943 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -68,11 +68,6 @@ case class DataSourceV2Relation(
   override def newInstance(): DataSourceV2Relation = {
     copy(output = output.map(_.newInstance()))
   }
-
-  override def refresh(): Unit = table match {
-    case table: FileTable => table.fileIndex.refresh()
-    case _ => // Do nothing.
-  }
 }
 
 /**
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index d4bad29..e88c751 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -47,8 +47,7 @@ class FilePartitionReader[T](readers: 
Iterator[PartitionedFileReader[T]])
               e.getMessage + "\n" +
                 "It is possible the underlying files have been updated. " +
                 "You can explicitly invalidate the cache in Spark by " +
-                "running 'REFRESH TABLE tableName' command in SQL or " +
-                "by recreating the Dataset/DataFrame involved.")
+                "recreating the Dataset/DataFrame involved.")
           case e @ (_: RuntimeException | _: IOException) if 
ignoreCorruptFiles =>
             logWarning(
               s"Skipped the rest of the content in the corrupted file: 
$currentReader", e)
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala
index 664d59c..602951b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.test.SharedSQLContext
 abstract class MetadataCacheSuite extends QueryTest with SharedSQLContext {
 
   /** Removes one data file in the given directory. */
-  private def deleteOneFileInDirectory(dir: File): Unit = {
+  protected def deleteOneFileInDirectory(dir: File): Unit = {
     assert(dir.isDirectory)
     val oneFile = dir.listFiles().find { file =>
       !file.getName.startsWith("_") && !file.getName.startsWith(".")
@@ -38,10 +38,9 @@ abstract class MetadataCacheSuite extends QueryTest with 
SharedSQLContext {
     oneFile.foreach(_.delete())
   }
 
-  test("SPARK-16336,SPARK-27504 Suggest doing table refresh " +
-    "when encountering FileNotFoundException") {
+  test("SPARK-16336,SPARK-27961 Suggest fixing FileNotFoundException") {
     withTempPath { (location: File) =>
-      // Create a Parquet directory
+      // Create an ORC directory
       spark.range(start = 0, end = 100, step = 1, numPartitions = 3)
         .write.orc(location.getAbsolutePath)
 
@@ -57,13 +56,20 @@ abstract class MetadataCacheSuite extends QueryTest with 
SharedSQLContext {
         df.count()
       }
       assert(e.getMessage.contains("FileNotFoundException"))
-      assert(e.getMessage.contains("REFRESH"))
+      assert(e.getMessage.contains("recreating the Dataset/DataFrame 
involved"))
     }
   }
+}
+
+class MetadataCacheV1Suite extends MetadataCacheSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
 
-  test("SPARK-16337,SPARK-27504 temporary view refresh") {
+  test("SPARK-16337 temporary view refresh") {
     withTempView("view_refresh") { withTempPath { (location: File) =>
-      // Create a Parquet directory
+      // Create an ORC directory
       spark.range(start = 0, end = 100, step = 1, numPartitions = 3)
         .write.orc(location.getAbsolutePath)
 
@@ -113,13 +119,6 @@ abstract class MetadataCacheSuite extends QueryTest with 
SharedSQLContext {
   }
 }
 
-class MetadataCacheV1Suite extends MetadataCacheSuite {
-  override protected def sparkConf: SparkConf =
-    super
-      .sparkConf
-      .set(SQLConf.USE_V1_SOURCE_READER_LIST, "orc")
-}
-
 class MetadataCacheV2Suite extends MetadataCacheSuite {
   override protected def sparkConf: SparkConf =
     super


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-27961][SQL] DataSourceV2Relation should not have refresh method

Reply via email to