Repository: carbondata Updated Branches: refs/heads/master 51353f5bc -> f74d1efac
[HOTFIX] Add partition usage code This closes #1956 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f74d1efa Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f74d1efa Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f74d1efa Branch: refs/heads/master Commit: f74d1efac952d550566597b748f9f0d78be59efe Parents: 51353f5 Author: chenliang613 <chenliang...@huawei.com> Authored: Fri Feb 9 01:32:38 2018 +0800 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Fri Mar 2 19:04:40 2018 +0530 ---------------------------------------------------------------------- .../examples/StandardPartitionExample.scala | 125 ++++++++++++++----- .../test/resources/partition_data_example.csv | 27 ++++ 2 files changed, 119 insertions(+), 33 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/f74d1efa/examples/spark2/src/main/scala/org/apache/carbondata/examples/StandardPartitionExample.scala ---------------------------------------------------------------------- diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/StandardPartitionExample.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/StandardPartitionExample.scala index 20570a2..485eb89 100644 --- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/StandardPartitionExample.scala +++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/StandardPartitionExample.scala @@ -21,8 +21,11 @@ import java.io.File import org.apache.spark.sql.{SaveMode, SparkSession} +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + /** - * This example is dynamic partition, same as spark partition. + * This example is for standard partition, same as hive and spark partition */ object StandardPartitionExample { @@ -31,43 +34,98 @@ object StandardPartitionExample { val rootPath = new File(this.getClass.getResource("/").getPath + "../../../..").getCanonicalPath - val testData = s"$rootPath/integration/spark-common-test/src/test/resources/partition_data.csv" + val testData = s"$rootPath/integration/spark-common-test/src/test/resources/" + + s"partition_data_example.csv" val spark = ExampleUtils.createCarbonSession("StandardPartitionExample") spark.sparkContext.setLogLevel("ERROR") - // 1. simple usage for StandardPartition + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd") + + /** + * 1. Partition basic usages + */ + + spark.sql("DROP TABLE IF EXISTS origintable") + spark.sql( + """ + | CREATE TABLE origintable + | (id Int, + | vin String, + | logdate Date, + | phonenumber Long, + | country String, + | area String, + | salary Int) + | STORED BY 'org.apache.carbondata.format' + """.stripMargin) + + spark.sql( + s""" + LOAD DATA LOCAL INPATH '$testData' into table origintable + """) + + spark.sql("select * from origintable").show(false) + + // create partition table with logdate as partition column + spark.sql("DROP TABLE IF EXISTS partitiontable0") - spark.sql(""" - | CREATE TABLE partitiontable0 - | (id Int, - | vin String, - | phonenumber Long, - | area String, - | salary Int) - | PARTITIONED BY (country String) - | STORED BY 'org.apache.carbondata.format' - | TBLPROPERTIES('SORT_COLUMNS'='id,vin') - """.stripMargin) - - spark.sql(s""" + spark.sql( + """ + | CREATE TABLE partitiontable0 + | (id Int, + | vin String, + | phonenumber Long, + | country String, + | area String, + | salary Int) + | PARTITIONED BY (logdate Date) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='id,vin') + """.stripMargin) + + // load data and build partition with logdate value + + spark.sql( + s""" LOAD DATA LOCAL INPATH '$testData' into table partitiontable0 """) spark.sql( s""" - | SELECT country,id,vin,phonenumber,area,salary + | SELECT logdate,id,vin,phonenumber,country,area,salary + | FROM partitiontable0 where logdate = cast('2016-02-12' as date) + """.stripMargin).show(100, false) + + spark.sql("show partitions default.partitiontable0").show() + + // insert data to table partitiontable0 and build partition with static value '2018-02-15' + spark.sql("insert into table partitiontable0 partition(logdate='2018-02-15') " + + "select id,vin,phonenumber,country,area,salary from origintable") + + spark.sql( + s""" + | SELECT logdate,id,vin,phonenumber,country,area,salary | FROM partitiontable0 - """.stripMargin).show() + """.stripMargin).show(100, false) + + // insert overwrite data to table partitiontable0 + + spark.sql("UPDATE origintable SET (salary) = (88888)").show() + + spark.sql("insert overwrite table partitiontable0 partition(logdate='2018-02-15') " + + "select id,vin,phonenumber,country,area,salary from origintable") - spark.sql("UPDATE partitiontable0 SET (salary) = (88888) WHERE country='UK'").show() spark.sql( s""" - | SELECT country,id,vin,phonenumber,area,salary + | SELECT logdate,id,vin,phonenumber,country,area,salary | FROM partitiontable0 - """.stripMargin).show() + """.stripMargin).show(100, false) - // 2.compare the performance : with partition VS without partition + /** + * 2.Compare the performance : with partition VS without partition + */ // build test data, if set the data is larger than 100M, it will take 10+ mins. import scala.util.Random @@ -85,14 +143,15 @@ object StandardPartitionExample { // Create table with partition spark.sql("DROP TABLE IF EXISTS withpartition") - spark.sql(""" - | CREATE TABLE withpartition - | (ID String, - | city String, - | population Int) - | PARTITIONED BY (country String) - | STORED BY 'org.apache.carbondata.format' - """.stripMargin) + spark.sql( + """ + | CREATE TABLE withpartition + | (ID String, + | city String, + | population Int) + | PARTITIONED BY (country String) + | STORED BY 'org.apache.carbondata.format' + """.stripMargin) df.write.format("carbondata") .option("tableName", "withpartition") @@ -123,16 +182,16 @@ object StandardPartitionExample { """.stripMargin).count() } // scalastyle:off - println("time of without partition:" + time_without_partition.toString) - println("time of with partition:" + time_with_partition.toString) + println("----time of without partition----:" + time_without_partition.toString) + println("----time of with partition----:" + time_with_partition.toString) // scalastyle:on spark.sql("DROP TABLE IF EXISTS partitiontable0") spark.sql("DROP TABLE IF EXISTS withoutpartition") spark.sql("DROP TABLE IF EXISTS withpartition") + spark.sql("DROP TABLE IF EXISTS origintable") spark.close() } - } http://git-wip-us.apache.org/repos/asf/carbondata/blob/f74d1efa/integration/spark-common-test/src/test/resources/partition_data_example.csv ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/resources/partition_data_example.csv b/integration/spark-common-test/src/test/resources/partition_data_example.csv new file mode 100644 index 0000000..a70bc32 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/partition_data_example.csv @@ -0,0 +1,27 @@ +id,vin,logdate,phonenumber,country,area,salary +1,A42158424831,2016/2/12,125371341,China,Asia,10000 +2,A42158473831,2016/2/12,125371342,China,Asia,10001 +3,A42152474832,2016/2/12,125371343,US,America,10002 +4,A42151477823,2016/2/12,125371344,China,OutSpace,10003 +5,A42158474135,2016/2/12,125371345,Japan,OutSpace,10004 +6,A42258434831,2016/12/13,125371346,China,Asia,10005 +7,A42158475831,2016/12/13,125371347,UK,OutSpace,10006 +8,A41158494830,2016/12/13,225371348,China,Asia,10007 +9,A42158424831,2016/2/12,225371349,China,OutSpace,10008 +10,A42158473830,2016/2/12,225371310,China,Asia,10009 +11,A42152474830,2016/2/12,325371311,US,America,10010 +12,A42151477823,2016/12/13,425371312,China,Asia,10011 +13,A42158474133,2016/2/12,325371313,Japan,Asia,10012 +14,A42258434835,2016/12/13,525371314,China,Asia,10013 +15,A42158475836,2014/5/15,625371315,UK,OutSpace,10014 +16,A41158494838,2014/5/15,525371316,China,Asia,10015 +17,A42158424833,2016/2/12,425371317,China,Asia,10016 +18,A42158473832,2014/5/15,325371318,China,Asia,10017 +19,A42152474834,2014/5/15,225371319,US,America,10018 +20,A42151477824,2016/12/13,225371320,China,Asia,10019 +21,A42158474137,2014/5/15,325371321,Japan,Asia,10020 +22,A42258434837,2016/12/13,25371322,China,Asia,10021 +23,A42158475838,2014/5/15,425371323,UK,OutSpace,10022 +24,A41158494839,2016/12/13,625371324,China,Asia,10023 +25,A41158494840,2014/5/15,626381324,Good,OutSpace,10024 +26,A41158494843,2014/5/15,625378824,NotGood,OutSpace,10025 \ No newline at end of file