carbondata git commit: [HOTFIX] Add partition usage code

ravipesala Fri, 02 Mar 2018 05:35:04 -0800

Repository: carbondata
Updated Branches:
  refs/heads/master 51353f5bc -> f74d1efac



[HOTFIX] Add partition usage code

This closes #1956


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f74d1efa
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f74d1efa
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f74d1efa

Branch: refs/heads/master
Commit: f74d1efac952d550566597b748f9f0d78be59efe
Parents: 51353f5
Author: chenliang613 <chenliang...@huawei.com>
Authored: Fri Feb 9 01:32:38 2018 +0800
Committer: ravipesala <ravi.pes...@gmail.com>
Committed: Fri Mar 2 19:04:40 2018 +0530

----------------------------------------------------------------------
 .../examples/StandardPartitionExample.scala     | 125 ++++++++++++++-----
 .../test/resources/partition_data_example.csv   |  27 ++++
 2 files changed, 119 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/f74d1efa/examples/spark2/src/main/scala/org/apache/carbondata/examples/StandardPartitionExample.scala
----------------------------------------------------------------------
diff --git 
a/examples/spark2/src/main/scala/org/apache/carbondata/examples/StandardPartitionExample.scala
 
b/examples/spark2/src/main/scala/org/apache/carbondata/examples/StandardPartitionExample.scala
index 20570a2..485eb89 100644
--- 
a/examples/spark2/src/main/scala/org/apache/carbondata/examples/StandardPartitionExample.scala
+++ 
b/examples/spark2/src/main/scala/org/apache/carbondata/examples/StandardPartitionExample.scala
@@ -21,8 +21,11 @@ import java.io.File
 
 import org.apache.spark.sql.{SaveMode, SparkSession}
 
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.util.CarbonProperties
+
 /**
- * This example is dynamic partition, same as spark partition.
+ * This example is for standard partition, same as hive and spark partition
  */
 
 object StandardPartitionExample {
@@ -31,43 +34,98 @@ object StandardPartitionExample {
 
     val rootPath = new File(this.getClass.getResource("/").getPath
                             + "../../../..").getCanonicalPath
-    val testData = 
s"$rootPath/integration/spark-common-test/src/test/resources/partition_data.csv"
+    val testData = 
s"$rootPath/integration/spark-common-test/src/test/resources/" +
+                   s"partition_data_example.csv"
     val spark = ExampleUtils.createCarbonSession("StandardPartitionExample")
 
     spark.sparkContext.setLogLevel("ERROR")
 
-    // 1. simple usage for StandardPartition
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd")
+
+    /**
+     * 1. Partition basic usages
+     */
+
+    spark.sql("DROP TABLE IF EXISTS origintable")
+    spark.sql(
+      """
+        | CREATE TABLE origintable
+        | (id Int,
+        | vin String,
+        | logdate Date,
+        | phonenumber Long,
+        | country String,
+        | area String,
+        | salary Int)
+        | STORED BY 'org.apache.carbondata.format'
+      """.stripMargin)
+
+    spark.sql(
+      s"""
+       LOAD DATA LOCAL INPATH '$testData' into table origintable
+       """)
+
+    spark.sql("select * from origintable").show(false)
+
+    // create partition table with logdate as partition column
+
     spark.sql("DROP TABLE IF EXISTS partitiontable0")
-    spark.sql("""
-                | CREATE TABLE partitiontable0
-                | (id Int,
-                | vin String,
-                | phonenumber Long,
-                | area String,
-                | salary Int)
-                | PARTITIONED BY (country String)
-                | STORED BY 'org.apache.carbondata.format'
-                | TBLPROPERTIES('SORT_COLUMNS'='id,vin')
-              """.stripMargin)
-
-    spark.sql(s"""
+    spark.sql(
+      """
+        | CREATE TABLE partitiontable0
+        | (id Int,
+        | vin String,
+        | phonenumber Long,
+        | country String,
+        | area String,
+        | salary Int)
+        | PARTITIONED BY (logdate Date)
+        | STORED BY 'org.apache.carbondata.format'
+        | TBLPROPERTIES('SORT_COLUMNS'='id,vin')
+      """.stripMargin)
+
+    // load data and build partition with logdate value
+
+    spark.sql(
+      s"""
        LOAD DATA LOCAL INPATH '$testData' into table partitiontable0
        """)
 
     spark.sql(
       s"""
-         | SELECT country,id,vin,phonenumber,area,salary
+         | SELECT logdate,id,vin,phonenumber,country,area,salary
+         | FROM partitiontable0 where logdate = cast('2016-02-12' as date)
+      """.stripMargin).show(100, false)
+
+    spark.sql("show partitions default.partitiontable0").show()
+
+    // insert data to table partitiontable0 and build partition with static 
value '2018-02-15'
+    spark.sql("insert into table partitiontable0 
partition(logdate='2018-02-15') " +
+              "select id,vin,phonenumber,country,area,salary from origintable")
+
+    spark.sql(
+      s"""
+         | SELECT logdate,id,vin,phonenumber,country,area,salary
          | FROM partitiontable0
-      """.stripMargin).show()
+      """.stripMargin).show(100, false)
+
+    // insert overwrite data to table partitiontable0
+
+    spark.sql("UPDATE origintable SET (salary) = (88888)").show()
+
+    spark.sql("insert overwrite table partitiontable0 
partition(logdate='2018-02-15') " +
+              "select id,vin,phonenumber,country,area,salary from origintable")
 
-    spark.sql("UPDATE partitiontable0 SET (salary) = (88888) WHERE 
country='UK'").show()
     spark.sql(
       s"""
-         | SELECT country,id,vin,phonenumber,area,salary
+         | SELECT logdate,id,vin,phonenumber,country,area,salary
          | FROM partitiontable0
-      """.stripMargin).show()
+      """.stripMargin).show(100, false)
 
-    // 2.compare the performance : with partition VS without partition
+    /**
+     * 2.Compare the performance : with partition VS without partition
+     */
 
     // build test data, if set the data is larger than 100M, it will take 10+ 
mins.
     import scala.util.Random
@@ -85,14 +143,15 @@ object StandardPartitionExample {
 
     // Create table with partition
     spark.sql("DROP TABLE IF EXISTS withpartition")
-    spark.sql("""
-                | CREATE TABLE withpartition
-                | (ID String,
-                | city String,
-                | population Int)
-                | PARTITIONED BY (country String)
-                | STORED BY 'org.apache.carbondata.format'
-              """.stripMargin)
+    spark.sql(
+      """
+        | CREATE TABLE withpartition
+        | (ID String,
+        | city String,
+        | population Int)
+        | PARTITIONED BY (country String)
+        | STORED BY 'org.apache.carbondata.format'
+      """.stripMargin)
 
     df.write.format("carbondata")
       .option("tableName", "withpartition")
@@ -123,16 +182,16 @@ object StandardPartitionExample {
       """.stripMargin).count()
     }
     // scalastyle:off
-    println("time of without partition:" + time_without_partition.toString)
-    println("time of with partition:" + time_with_partition.toString)
+    println("----time of without partition----:" + 
time_without_partition.toString)
+    println("----time of with partition----:" + time_with_partition.toString)
     // scalastyle:on
 
     spark.sql("DROP TABLE IF EXISTS partitiontable0")
     spark.sql("DROP TABLE IF EXISTS withoutpartition")
     spark.sql("DROP TABLE IF EXISTS withpartition")
+    spark.sql("DROP TABLE IF EXISTS origintable")
 
     spark.close()
 
   }
-
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/f74d1efa/integration/spark-common-test/src/test/resources/partition_data_example.csv
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/resources/partition_data_example.csv 
b/integration/spark-common-test/src/test/resources/partition_data_example.csv
new file mode 100644
index 0000000..a70bc32
--- /dev/null
+++ 
b/integration/spark-common-test/src/test/resources/partition_data_example.csv
@@ -0,0 +1,27 @@
+id,vin,logdate,phonenumber,country,area,salary
+1,A42158424831,2016/2/12,125371341,China,Asia,10000
+2,A42158473831,2016/2/12,125371342,China,Asia,10001
+3,A42152474832,2016/2/12,125371343,US,America,10002
+4,A42151477823,2016/2/12,125371344,China,OutSpace,10003
+5,A42158474135,2016/2/12,125371345,Japan,OutSpace,10004
+6,A42258434831,2016/12/13,125371346,China,Asia,10005
+7,A42158475831,2016/12/13,125371347,UK,OutSpace,10006
+8,A41158494830,2016/12/13,225371348,China,Asia,10007
+9,A42158424831,2016/2/12,225371349,China,OutSpace,10008
+10,A42158473830,2016/2/12,225371310,China,Asia,10009
+11,A42152474830,2016/2/12,325371311,US,America,10010
+12,A42151477823,2016/12/13,425371312,China,Asia,10011
+13,A42158474133,2016/2/12,325371313,Japan,Asia,10012
+14,A42258434835,2016/12/13,525371314,China,Asia,10013
+15,A42158475836,2014/5/15,625371315,UK,OutSpace,10014
+16,A41158494838,2014/5/15,525371316,China,Asia,10015
+17,A42158424833,2016/2/12,425371317,China,Asia,10016
+18,A42158473832,2014/5/15,325371318,China,Asia,10017
+19,A42152474834,2014/5/15,225371319,US,America,10018
+20,A42151477824,2016/12/13,225371320,China,Asia,10019
+21,A42158474137,2014/5/15,325371321,Japan,Asia,10020
+22,A42258434837,2016/12/13,25371322,China,Asia,10021
+23,A42158475838,2014/5/15,425371323,UK,OutSpace,10022
+24,A41158494839,2016/12/13,625371324,China,Asia,10023
+25,A41158494840,2014/5/15,626381324,Good,OutSpace,10024
+26,A41158494843,2014/5/15,625378824,NotGood,OutSpace,10025
\ No newline at end of file

carbondata git commit: [HOTFIX] Add partition usage code

Reply via email to