akkio-97 commented on a change in pull request #4115: URL: https://github.com/apache/carbondata/pull/4115#discussion_r613786506
########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala ########## @@ -275,6 +275,64 @@ class AlterTableColumnSchemaGenerator( allColumns ++= Seq(columnSchema) } newCols ++= Seq(columnSchema) + if (DataTypes.isArrayType(columnSchema.getDataType)) { + columnSchema.setNumberOfChild(field.children.size) + val childField = field.children.get(0) + // TODO: should support adding multi-level complex columns: CARBONDATA-4164 + if (childField.children != Some(null)) { + throw new UnsupportedOperationException("Only single-level complex types are allowed") + } + val childSchema: ColumnSchema = TableNewProcessor.createColumnSchema( Review comment: done ########## File path: integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/alterTable/TestAlterTableAddColumns.scala ########## @@ -136,6 +140,221 @@ class TestAlterTableAddColumns extends QueryTest with BeforeAndAfterAll { sql(s"""DROP TABLE IF EXISTS ${ tableName }""") } + test("Test adding of array of all primitive datatypes") { + sql("DROP TABLE IF EXISTS alter_com") + sql("CREATE TABLE alter_com(intfield int) STORED AS carbondata") + sql( + "ALTER TABLE alter_com ADD COLUMNS(arr1 array<short>, arr2 array<int>, arr3 " + + "array<long>, arr4 array<double>, arr5 array<decimal(8,2)>, arr6 array<string>, arr7 " + + "array<char(5)>, arr8 array<varchar(50)>, arr9 array<boolean>, arr10 array<date>, arr11 " + + "array<timestamp> )") + sql("desc table alter_com").show(false) + sql( + "insert into alter_com values(1,array(1,5),array(1,2),array(1,2,3),array(1.2d,2.3d),array" + + "(4.5,6.7),array('hello','world'),array('a','bcd'),array('abcd','efg'),array(true,false)," + + "array('2017-02-01','2018-09-11'),array('2017-02-01 00:01:00','2018-02-01 02:21:00') )") + checkAnswer(sql( + "select * from alter_com"), + Seq(Row(1, make(Array(1, 5)), + make(Array(1, 2)), + make(Array(1, 2, 3)), + make(Array(1.2, 2.3)), + make(Array(java.math.BigDecimal.valueOf(4.5).setScale(2), + java.math.BigDecimal.valueOf(6.7).setScale(2))), + make(Array("hello", "world")), + make(Array("a", "bcd")), + make(Array("abcd", "efg")), + make(Array(true, false)), + make(Array(Date.valueOf("2017-02-01"), Date.valueOf("2018-09-11"))), + make(Array(Timestamp.valueOf("2017-02-01 00:01:00"), + Timestamp.valueOf("2018-02-01 02:21:00"))) + ))) + sql("DROP TABLE IF EXISTS alter_com") + } + + test("Test adding of struct of all primitive datatypes") { + sql("DROP TABLE IF EXISTS alter_com") + sql( + "CREATE TABLE alter_com(intField INT, structField struct<a:short,b:int,c:long,d:double," + + "e:decimal(8,2),f:string,g:char(5),h:varchar(50),i:boolean,j:date,k:timestamp>) STORED AS " + + "carbondata") + sql( + "insert into alter_com values(1, named_struct('a',1,'b',2,'c',3,'d',1.23,'e',2.34,'f'," + + "'hello','g','abc','h','def','i',true,'j','2017-02-01','k','2018-02-01 02:00:00.0') ) ") + sql("DROP TABLE IF EXISTS alter_com") + } + + def insertIntoTableForArrayType(): Unit = { + sql("insert into alter_com values(4,array(2),array(1,2,3,4),array('abc','def'))") + sql("insert into alter_com values(5,array(1,2),array(1), array('Hulk','Thor'))") + sql( + "insert into alter_com values(6,array(1,2,3),array(1234,8,33333),array('Iron','Man'," + + "'Jarvis'))") + } + + def checkRestulForArrayType(): Unit = { + val totalRows = sql("select * from alter_com").collect() + val a = sql("select * from alter_com where array_contains(arr2,2)").collect + val b = sql("select * from alter_com where array_contains(arr3,'Thor')").collect + val c = sql("select * from alter_com where intField = 1").collect + assert(totalRows.size == 6) + assert(a.size == 1) + assert(b.size == 1) + // check default value for newly added array column that is index - 3 and 4 + assert(c(0)(2) == null && c(0)(3) == null) + } + + test("Test alter add for arrays enabling local dictionary") { + sql("DROP TABLE IF EXISTS alter_com") + sql( + "CREATE TABLE alter_com(intField INT, arr1 array<int>) " + + "STORED AS carbondata TBLPROPERTIES('local_dictionary_enable'='true')") + + sql("insert into alter_com values(1,array(1) )") + sql("insert into alter_com values(2,array(9,0) )") + sql("insert into alter_com values(3,array(11,12,13) )") + + sql( + "ALTER TABLE alter_com ADD COLUMNS(arr2 array<int>, arr3 array<string>) TBLPROPERTIES" + + "('LOCAL_DICTIONARY_INCLUDE'='arr3')") + val schema = sql("describe alter_com").collect() + assert(schema.size == 4) + + // For the previous segments the default value for newly added array column is null + insertIntoTableForArrayType + checkRestulForArrayType + sql("DROP TABLE IF EXISTS alter_com") + + } + + test("Test alter add for arrays disabling local dictionary") { + sql("DROP TABLE IF EXISTS alter_com") + sql( + "CREATE TABLE alter_com(intField INT, arr1 array<int>) " + + "STORED AS carbondata TBLPROPERTIES('local_dictionary_enable'='false')") + + sql("insert into alter_com values(1,array(1) )") + sql("insert into alter_com values(2,array(9,0) )") + sql("insert into alter_com values(3,array(11,12,13) )") + sql( + "ALTER TABLE alter_com ADD COLUMNS(arr2 array<int>, arr3 array<string>) TBLPROPERTIES" + + "('LOCAL_DICTIONARY_EXCLUDE'='arr3')") + val schema = sql("describe alter_com").collect() + assert(schema.size == 4) + + // For the previous segments the default value for newly added array column is null + insertIntoTableForArrayType + checkRestulForArrayType + sql("DROP TABLE IF EXISTS alter_com") + + } + + def insertIntoTableForStructType(): Unit = { + sql( + "insert into alter_struct values(2, named_struct('id1', 'id2','name1','name2'), " + + "named_struct('a','id2','b', 'abc2'), 'hello world', 5, named_struct('c','id3'," + + "'d', 'abc3','e', 22), array(1,2,3) )") + sql( + "insert into alter_struct values(3, named_struct('id1', 'id3','name1','name3'), " + + "named_struct('a','id2.1','b', 'abc2.1'), 'India', 5, named_struct('c','id3.1'," + + "'d', 'abc3.1','e', 25), array(4,5) )") + + } + + def checkResultForStructType(): Unit = { + val totalRows = sql("select * from alter_struct").collect() + val a = sql("select * from alter_struct where struct1.a = 'id2' ").collect() + val b = sql( + "select * from alter_struct where struct1.a = 'id2' or struct2.c = 'id3.1' or " + + "array_contains(arr,5) ").collect() + val c = sql("select * from alter_struct where roll = 1").collect() + + assert(totalRows.size == 3) + assert(a.size == 1) + assert(b.size == 2) + // check default value for newly added struct column + assert(c(0)(2) == null) + } + + test("Test alter add for structs enabling local dictionary") { Review comment: done ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala ########## @@ -275,6 +275,64 @@ class AlterTableColumnSchemaGenerator( allColumns ++= Seq(columnSchema) } newCols ++= Seq(columnSchema) + if (DataTypes.isArrayType(columnSchema.getDataType)) { + columnSchema.setNumberOfChild(field.children.size) + val childField = field.children.get(0) + // TODO: should support adding multi-level complex columns: CARBONDATA-4164 + if (childField.children != Some(null)) { + throw new UnsupportedOperationException("Only single-level complex types are allowed") + } + val childSchema: ColumnSchema = TableNewProcessor.createColumnSchema( + childField, + new java.util.ArrayList[Encoding](), + isDimensionCol = true, + childField.precision, + childField.scale, + childField.schemaOrdinal + currentSchemaOrdinal, + alterTableModel.highCardinalityDims, + alterTableModel.databaseName.getOrElse(dbName), + isSortColumn(childField.name.getOrElse(childField.column)), + isVarcharColumn(childField.name.getOrElse(childField.column))) + + if (childSchema.getDataType == DataTypes.VARCHAR) { + // put the new long string columns in 'longStringCols' + // and add them after old long string columns + longStringCols ++= Seq(childSchema) + } else { + allColumns ++= Seq(childSchema) + } + newCols ++= Seq(childSchema) + } else if (DataTypes.isStructType(columnSchema.getDataType)) { + val noOfChildren = field.children.get.size + columnSchema.setNumberOfChild(noOfChildren) + for (i <- 0 to noOfChildren - 1) { + val childField = field.children.get(i) + // TODO: should support adding multi-level complex columns: CARBONDATA-4164 + if (childField.children != Some(null)) { + throw new UnsupportedOperationException("Only single-level complex types are allowed") Review comment: done ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala ########## @@ -275,6 +275,64 @@ class AlterTableColumnSchemaGenerator( allColumns ++= Seq(columnSchema) } newCols ++= Seq(columnSchema) + if (DataTypes.isArrayType(columnSchema.getDataType)) { + columnSchema.setNumberOfChild(field.children.size) + val childField = field.children.get(0) + // TODO: should support adding multi-level complex columns: CARBONDATA-4164 + if (childField.children != Some(null)) { + throw new UnsupportedOperationException("Only single-level complex types are allowed") Review comment: done ########## File path: index/secondary-index/src/test/scala/org/apache/carbondata/spark/testsuite/secondaryindex/TestSIWithComplexArrayType.scala ########## @@ -43,6 +43,83 @@ class TestSIWithComplexArrayType extends QueryTest with BeforeAndAfterEach { sql("drop table if exists complextable5") } + test("Test alter add array column before creating SI") { + sql("drop table if exists complextable") + sql("create table complextable (id string, country array<string>, name string) stored as carbondata") + sql("insert into complextable select 1,array('china', 'us'), 'b'") + sql("insert into complextable select 2,array('pak'), 'v'") + + sql("drop index if exists index_11 on complextable") + sql( + "ALTER TABLE complextable ADD COLUMNS(arr2 array<string>)") + sql("insert into complextable select 3,array('china'), 'f',array('hello','world')") + sql("insert into complextable select 4,array('india'),'g',array('iron','man','jarvis')") + + val result1 = sql("select * from complextable where array_contains(arr2,'iron')") + val result2 = sql("select * from complextable where arr2[0]='iron'") + sql("create index index_11 on table complextable(arr2) as 'carbondata'") + val df1 = sql(" select * from complextable where array_contains(arr2,'iron')") + val df2 = sql(" select * from complextable where arr2[0]='iron'") + if (!isFilterPushedDownToSI(df1.queryExecution.sparkPlan)) { + assert(false) + } else { + assert(true) + } + if (!isFilterPushedDownToSI(df2.queryExecution.sparkPlan)) { + assert(false) + } else { + assert(true) + } + val doNotHitSIDf = sql(" select * from complextable where array_contains(arr2,'iron') and array_contains(arr2,'man')") + if (isFilterPushedDownToSI(doNotHitSIDf.queryExecution.sparkPlan)) { + assert(false) + } else { + assert(true) + } + checkAnswer(result1, df1) + checkAnswer(result2, df2) + } + + test("Test alter add array column after creating SI") { Review comment: done ########## File path: integration/spark/src/test/scala/org/apache/carbondata/view/rewrite/TestAllOperationsOnMV.scala ########## @@ -79,6 +90,12 @@ class TestAllOperationsOnMV extends QueryTest with BeforeAndAfterEach { }.getMessage.contains("Cannot add columns in a materialized view table default.dm1") } + test("test alter add complex column on MV table") { + intercept[ProcessMetaDataException] { Review comment: done ########## File path: integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchemaCommon.scala ########## @@ -473,6 +528,11 @@ class AlterTableColumnSchemaGenerator( for (elem <- alterTableModel.tableProperties) { if (elem._1.toLowerCase.startsWith(defaultValueString)) { if (col.getColumnName.equalsIgnoreCase(elem._1.substring(defaultValueString.length))) { + if (DataTypes.isArrayType(col.getDataType) || DataTypes.isStructType(col.getDataType) || + DataTypes.isStructType(col.getDataType)) { Review comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org