Github user kunal642 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2042#discussion_r173367821 --- Diff: integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/StandardPartitionTestCase.scala --- @@ -0,0 +1,436 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.cluster.sdv.generated + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties +import org.apache.spark.sql.Row +import org.apache.spark.sql.common.util._ +import org.scalatest.BeforeAndAfterAll + +/** + * Test Class for partitionTestCase to verify all scenerios + */ + +class StandardPartitionTestCase extends QueryTest with BeforeAndAfterAll { + + override def beforeAll = { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd HH:mm:ss") + .addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd") + } + //Verify exception if column in partitioned by is already specified in table schema + + test("Standard-Partition_TC001", Include) { + intercept[Exception] { + sql(s"""drop table if exists uniqdata""") + sql(s"""CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1 int) PARTITIONED BY (INTEGER_COLUMN1 int)STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('PARTITION_TYPE'='List','LIST_INFO'='1,3')""") + } + sql(s"""drop table if exists uniqdata""") + } + + //Verify table is created with Partition + test("Standard-Partition_TC002", Include) { + sql(s"""drop table if exists uniqdata""") + sql(s"""CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double) PARTITIONED BY (INTEGER_COLUMN1 int)STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('PARTITION_TYPE'='List','LIST_INFO'='1,3')""") + val df = sql(s"""DESC uniqdata""") + assert(df.collect().reverse.head.get(0).toString.toUpperCase.contains("INTEGER_COLUMN1")) + sql(s"""drop table if exists uniqdata""") + } + + //Verify table is created with Partition with table comment + + test("Standard-Partition_TC003",Include) { + sql(s"""drop table if exists partition_table""") + sql(s"""CREATE TABLE partition_table(shortField SHORT, intField INT, bigintField LONG, doubleField DOUBLE, decimalField DECIMAL(18,2), charField CHAR(5), floatField FLOAT ) COMMENT 'partition_table' PARTITIONED BY (stringField STRING) STORED BY 'carbondata'""") + val df = sql(s"""DESC formatted partition_table""") + checkExistence(df, true, "partition_table") + } + + //Verify WHEN partitioned by is not specified in the DDL, but partition type,number of partitions and list info are given + test("Standard-Partition_TC004", Include) { + sql(s"""drop table if exists uniqdata""") + sql(s"""CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1 int) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('PARTITION_TYPE'='List','NUM_PARTITIONS'='3','LIST_INFO'='1,3')""") + assert(true) + sql(s"""drop table if exists uniqdata""") + } + + //Creating a partition table, with ARRAY type partitioned column + test("Standard-Partition_TC005", Include) { + intercept[Exception] { + sql(s"""drop table if exists partition_table_array""") + sql(s"""CREATE TABLE partition_table_array(shortField SHORT, intField INT, bigintField LONG, doubleField DOUBLE, stringField STRING, timestampField TIMESTAMP, decimalField DECIMAL(18,2), dateField DATE, charField CHAR(5), floatField FLOAT ) PARTITIONED BY (complexData ARRAY<STRING>) STORED BY 'carbondata'""") + sql(s"""drop table if exists partition_table_array""") + } + } + + //Verify exception while Creating a partition table with DICTIONARY_INCLUDE + test("Standard-Partition_TC006", Include) { + intercept[Exception] { + sql(s"""drop table if exists partition_table_string""") + sql(s"""CREATE TABLE partition_table_string(shortField SHORT, intField INT, bigintField LONG, doubleField DOUBLE, timestampField TIMESTAMP, decimalField DECIMAL(18,2), dateField DATE, charField CHAR(5), floatField FLOAT, complexData ARRAY<STRING> ) PARTITIONED BY (stringField STRING) STORED BY 'carbondata' TBLPROPERTIES('DICTIONARY_INCLUDE'='stringField')""") + sql(s"""drop table if exists partition_table_string""") + } + } + + //Creating a partition table with DICTIONARY_EXCLUDE + test("Standard-Partition_TC007", Include) { + sql(s"""drop table if exists partition_table_string""") + sql(s"""CREATE TABLE partition_table_string(shortField SHORT, intField INT, bigintField LONG, doubleField DOUBLE, timestampField TIMESTAMP, decimalField DECIMAL(18,2), dateField DATE, charField CHAR(5), floatField FLOAT, complexData ARRAY<STRING> ) PARTITIONED BY (stringField STRING) STORED BY 'carbondata' TBLPROPERTIES('DICTIONARY_EXCLUDE'='stringField')""") + sql(s"""drop table if exists partition_table_string""") + } + + //Verify exception if datatype is not provided with partition column + test("Standard-Partition_TC008", Include) { + intercept[Exception] { + sql(s"""drop table if exists uniqdata""") + sql(s"""CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double) PARTITIONED BY (DOJ)STORED BY 'org.apache.carbondata.format'""") + } + sql(s"""drop table if exists uniqdata""") + } + + //Verify exception if non existent file header is provided in partition + test("Standard-Partition_TC009", Include) { + intercept[Exception] { + sql(s"""drop table if exists uniqdata""") + sql(s"""CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double) PARTITIONED BY (DOJ timestamp)STORED BY 'org.apache.carbondata.format'""") + sql(s"""LOAD DATA INPATH '$resourcesPath/Data/partition/2000_UniqData_partition.csv' into table uniqdata OPTIONS('DELIMITER'=',','QUOTECHAR'='"','FILEHEADER'='CUST_NAME,ACTIVE_EMUI_VERSION,DOJ,,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1,DOJ,CUST_ID')""") + } + sql(s"""drop table if exists uniqdata""") + } + + //Verify exception if PARTITION BY is empty + test("Standard-Partition_TC010", Include) { + intercept[Exception] { + sql(s"""drop table if exists uniqdata""") + sql(s"""CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10), DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double) PARTITIONED BY ()STORED BY 'org.apache.carbondata.format'""") + } + sql(s"""drop table if exists uniqdata""") + } + + //Loading data into partitioned table with SORT_SCOPE=LOCAL_SORT + test("Standard-Partition_TC011", Include) { + sql(s"""drop table if exists partition_table""") + sql(s"""CREATE TABLE partition_table(shortField SHORT, intField INT, bigintField LONG, doubleField DOUBLE, timestamp TIMESTAMP, decimalField DECIMAL(18,2),dateField DATE, charField CHAR(5), floatField FLOAT ) PARTITIONED BY (stringField STRING) STORED BY 'carbondata' TBLPROPERTIES('SORT_SCOPE'='LOCAL_SORT')""") + sql(s"""load data inpath '$resourcesPath/Data/partition/list_partition_table.csv' into table partition_table options('FILEHEADER'='shortfield,intfield,bigintfield,doublefield,stringfield,timestamp,decimalfield,datefield,charfield,floatfield')""") + checkAnswer(sql(s"""select count(*) from partition_table"""), Seq(Row(11))) + sql(s"""drop table if exists partition_table""") + } + + //Loading data into partitioned table with SORT_SCOPE=GLOBAL_SORT + test("Standard-Partition_TC012", Include) { + sql(s"""drop table if exists partition_table""") + sql(s"""CREATE TABLE partition_table(shortField SHORT, intField INT, bigintField LONG, doubleField DOUBLE, timestamp TIMESTAMP, decimalField DECIMAL(18,2),dateField DATE, charField CHAR(5), floatField FLOAT ) PARTITIONED BY (stringField STRING) STORED BY 'carbondata' TBLPROPERTIES('SORT_SCOPE'='GLOBAL_SORT')""") + sql(s"""load data inpath '$resourcesPath/Data/partition/list_partition_table.csv' into table partition_table options('FILEHEADER'='shortfield,intfield,bigintfield,doublefield,stringfield,timestamp,decimalfield,datefield,charfield,floatfield')""") + checkAnswer(sql(s"""select count(*) from partition_table"""), Seq(Row(11))) + sql(s"""drop table if exists partition_table""") + } + + //Loading data into partitioned table with SORT_SCOPE=BATCH_SORT + test("Standard-Partition_TC013", Include) { + sql(s"""drop table if exists partition_table""") + sql(s"""CREATE TABLE partition_table(shortField SHORT, intField INT, bigintField LONG, doubleField DOUBLE, timestamp TIMESTAMP, decimalField DECIMAL(18,2),dateField DATE, charField CHAR(5), floatField FLOAT ) PARTITIONED BY (stringField STRING) STORED BY 'carbondata' TBLPROPERTIES('SORT_SCOPE'='BATCH_SORT')""") + sql(s"""load data inpath '$resourcesPath/Data/partition/list_partition_table.csv' into table partition_table options('FILEHEADER'='shortfield,intfield,bigintfield,doublefield,stringfield,timestamp,decimalfield,datefield,charfield,floatfield')""") + checkAnswer(sql(s"""select count(*) from partition_table"""), Seq(Row(11))) + sql(s"""drop table if exists partition_table""") + } + + //Loading data into partitioned table with SORT_SCOPE=NO_SORT + test("Standard-Partition_TC014", Include) { + sql(s"""drop table if exists partition_table""") + sql(s"""CREATE TABLE partition_table(shortField SHORT, intField INT, bigintField LONG, doubleField DOUBLE, timestamp TIMESTAMP, decimalField DECIMAL(18,2),dateField DATE, charField CHAR(5), floatField FLOAT ) PARTITIONED BY (stringField STRING) STORED BY 'carbondata' TBLPROPERTIES('SORT_SCOPE'='NO_SORT')""") + sql(s"""load data inpath '$resourcesPath/Data/partition/list_partition_table.csv' into table partition_table options('FILEHEADER'='shortfield,intfield,bigintfield,doublefield,stringfield,timestamp,decimalfield,datefield,charfield,floatfield')""") + checkAnswer(sql(s"""select count(*) from partition_table"""), Seq(Row(11))) + sql(s"""drop table if exists partition_table""") + } + + //Loading data into a partitioned table with Bad Records Action = FORCE + test("Standard-Partition_TC015", Include) { + sql(s"""drop table if exists partition_table""") + sql(s"""CREATE TABLE partition_table(shortField SHORT, intField INT, bigintField LONG, doubleField DOUBLE, timestamp TIMESTAMP, decimalField DECIMAL(18,2),dateField DATE, charField CHAR(5), floatField FLOAT ) PARTITIONED BY (stringField STRING) STORED BY 'carbondata'""") + sql(s"""load data inpath '$resourcesPath/Data/partition/list_partition_table.csv' into table partition_table options('FILEHEADER'='shortfield,intfield,bigintfield,doublefield,stringfield,timestamp,decimalfield,datefield,charfield,floatfield','BAD_RECORDS_ACTION'='FORCE')""") + checkAnswer(sql(s"""select count(*) from partition_table"""), Seq(Row(11))) + sql(s"""select count(*) from partition_table""").show() --- End diff -- remove this line
---