[GitHub] spark pull request: [SPARK-14609][SQL] Native support for LOAD DAT...

viirya Mon, 25 Apr 2016 23:23:39 -0700

Github user viirya commented on a diff in the pull request:

    https://github.com/apache/spark/pull/12412#discussion_r61033530
  
    --- Diff: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
 ---
    @@ -122,4 +122,129 @@ class HiveCommandSuite extends QueryTest with 
SQLTestUtils with TestHiveSingleto
           checkAnswer(sql("SHOW TBLPROPERTIES parquet_temp"), Nil)
         }
       }
    +
    +  test("LOAD DATA") {
    +    withTable("non_part_table", "part_table") {
    +      sql(
    +        """
    +          |CREATE TABLE non_part_table (employeeID INT, employeeName 
STRING)
    +          |ROW FORMAT DELIMITED
    +          |FIELDS TERMINATED BY '|'
    +          |LINES TERMINATED BY '\n'
    +        """.stripMargin)
    +
    +      // employee.dat has two columns separated by '|', the first is an 
int, the second is a string.
    +      // Its content looks like:
    +      // 16|john
    +      // 17|robert
    +      val testData = 
hiveContext.getHiveFile("data/files/employee.dat").getCanonicalPath
    +
    +      // LOAD DATA INTO non-partitioned table can't specify partition
    +      intercept[AnalysisException] {
    +        sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE 
non_part_table PARTITION(ds="1")""")
    +      }
    +
    +      sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE 
non_part_table""")
    +      checkAnswer(
    +        sql("SELECT * FROM non_part_table WHERE employeeID = 16"),
    +        Row(16, "john") :: Nil)
    +
    +      sql(
    +        """
    +          |CREATE TABLE part_table (employeeID INT, employeeName STRING)
    +          |PARTITIONED BY (c STRING, d STRING)
    +          |ROW FORMAT DELIMITED
    +          |FIELDS TERMINATED BY '|'
    +          |LINES TERMINATED BY '\n'
    +        """.stripMargin)
    +
    +      // LOAD DATA INTO partitioned table must specify partition
    +      intercept[AnalysisException] {
    +        sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE 
part_table""")
    +      }
    +
    +      intercept[AnalysisException] {
    +        sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table 
PARTITION(c="1")""")
    +      }
    +      intercept[AnalysisException] {
    +        sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table 
PARTITION(d="1")""")
    +      }
    +      intercept[AnalysisException] {
    +        sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table 
PARTITION(c="1", k="2")""")
    +      }
    +
    +      sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table 
PARTITION(c="1", d="2")""")
    +      checkAnswer(
    +        sql("SELECT employeeID, employeeName FROM part_table WHERE c = '1' 
AND d = '2'"),
    +        sql("SELECT * FROM non_part_table").collect())
    +
    +      // Different order of partition columns.
    +      sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE part_table 
PARTITION(d="1", c="2")""")
    +      checkAnswer(
    +        sql("SELECT employeeID, employeeName FROM part_table WHERE c = '2' 
AND d = '1'"),
    +        sql("SELECT * FROM non_part_table").collect())
    +    }
    +  }
    +
    +  test("LOAD DATA: input path") {
    +    withTable("non_part_table") {
    +      sql(
    +        """
    +          |CREATE TABLE non_part_table (employeeID INT, employeeName 
STRING)
    +          |ROW FORMAT DELIMITED
    +          |FIELDS TERMINATED BY '|'
    +          |LINES TERMINATED BY '\n'
    +        """.stripMargin)
    +
    +      // Non-existing inpath
    +      intercept[AnalysisException] {
    +        sql("""LOAD DATA LOCAL INPATH "/non-existing/data.txt" INTO TABLE 
non_part_table""")
    +      }
    +
    +      val testData = 
hiveContext.getHiveFile("data/files/employee.dat").getCanonicalPath
    +
    +      // Non-local inpath: without URI Scheme and Authority
    +      sql(s"""LOAD DATA INPATH "$testData" INTO TABLE non_part_table""")
    +      checkAnswer(
    +        sql("SELECT * FROM non_part_table WHERE employeeID = 16"),
    +        Row(16, "john") :: Nil)
    +
    +      // Use URI as LOCAL inpath:
    +      // file:/path/to/data/files/employee.dat
    +      val uri = "file:" + testData
    +      sql(s"""LOAD DATA LOCAL INPATH "$uri" INTO TABLE non_part_table""")
    +
    +      checkAnswer(
    +        sql("SELECT * FROM non_part_table WHERE employeeID = 16"),
    +        Row(16, "john") :: Row(16, "john") :: Nil)
    +
    +      // Use URI as non-LOCAL inpath
    +      sql(s"""LOAD DATA INPATH "$uri" INTO TABLE non_part_table""")
    +
    +      checkAnswer(
    +        sql("SELECT * FROM non_part_table WHERE employeeID = 16"),
    +        Row(16, "john") :: Row(16, "john") :: Row(16, "john") :: Nil)
    +
    +      sql(s"""LOAD DATA INPATH "$uri" OVERWRITE INTO TABLE 
non_part_table""")
    +
    +      checkAnswer(
    +        sql("SELECT * FROM non_part_table WHERE employeeID = 16"),
    +        Row(16, "john") :: Nil)
    +
    +      // Incorrect URI:
    +      // file://path/to/data/files/employee.dat
    +      val incorrectUri = "file:/" + testData
    +      intercept[AnalysisException] {
    +        sql(s"""LOAD DATA LOCAL INPATH "$incorrectUri" INTO TABLE 
non_part_table""")
    +      }
    +
    +      // Unset default URI Scheme and Authority: throw exception
    --- End diff --
    
    When there is no fs.default.name setting and no scheme and authority are 
given from user, exception will be thrown, because we don't know the default.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request: [SPARK-14609][SQL] Native support for LOAD DAT...

Reply via email to