[carbondata] branch master updated: [HOTFIX][DataLoad]fix task assignment issue using NODE_MIN_SIZE_FIRST block assignment strategy
This is an automated email from the ASF dual-hosted git repository. manishgupta88 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/carbondata.git The following commit(s) were added to refs/heads/master by this push: new 39cd2f4 [HOTFIX][DataLoad]fix task assignment issue using NODE_MIN_SIZE_FIRST block assignment strategy 39cd2f4 is described below commit 39cd2f44cf17d3817d27a11a8ca871fac2500794 Author: ndwangsen AuthorDate: Wed Jan 9 18:44:08 2019 +0800 [HOTFIX][DataLoad]fix task assignment issue using NODE_MIN_SIZE_FIRST block assignment strategy This PR sloves the problem of incorrect assignment of tasks if specified minimum data size to load is less than the average size for each node. This closes #3059 --- .../org/apache/carbondata/processing/util/CarbonLoaderUtil.java | 8 1 file changed, 8 insertions(+) diff --git a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java index 23e4a8f..0ff3eb6 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java +++ b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonLoaderUtil.java @@ -609,6 +609,14 @@ public final class CarbonLoaderUtil { blockAssignmentStrategy = BlockAssignmentStrategy.BLOCK_SIZE_FIRST; } else { blockAssignmentStrategy = BlockAssignmentStrategy.BLOCK_NUM_FIRST; + // fall back to BLOCK_NUM_FIRST strategy need to reset + // the average expected size for each node + if (numOfNodes == 0) { +sizePerNode = 1; + } else { +sizePerNode = blockInfos.size() / numOfNodes; +sizePerNode = sizePerNode <= 0 ? 1 : sizePerNode; + } } LOGGER.info("Specified minimum data size to load is less than the average size " + "for each node, fallback to default strategy" + blockAssignmentStrategy);
[carbondata] branch master updated: [CARBONDATA-3233]Fix JVM crash issue in snappy compressor and update the pagesize correctly
This is an automated email from the ASF dual-hosted git repository. manishgupta88 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/carbondata.git The following commit(s) were added to refs/heads/master by this push: new 92c9ce3 [CARBONDATA-3233]Fix JVM crash issue in snappy compressor and update the pagesize correctly 92c9ce3 is described below commit 92c9ce3ff0da23f207376e4f8861717e2e3de1e5 Author: akashrn5 AuthorDate: Mon Jan 7 16:34:48 2019 +0530 [CARBONDATA-3233]Fix JVM crash issue in snappy compressor and update the pagesize correctly Problem: 1. During dataload sometimes the JVM is crashed during offheap snappy compression. We get the maximun compress size from compressor and allocate that much memory and then call rawCompress with the base offset of page and then base offset of newly created memory block. During this call sometimes JVM crashes from Snappy. This issue is random one and fails only sometimes. 2. PageSize is getting updated wrongly, actaul pageSize is number of rows in page, we were updaing the rowId not rowSize Solution: Remove the method implementation and let super class handle the compression based on the datatype. which will fix this random JVM crash issue This closes #3053 --- .../datastore/page/UnsafeFixLengthColumnPage.java | 29 +++--- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java index da0e487..2e576bc 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java @@ -17,10 +17,8 @@ package org.apache.carbondata.core.datastore.page; -import java.io.IOException; import java.math.BigDecimal; -import org.apache.carbondata.core.datastore.compression.Compressor; import org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta; import org.apache.carbondata.core.memory.CarbonUnsafe; import org.apache.carbondata.core.memory.MemoryBlock; @@ -112,7 +110,8 @@ public class UnsafeFixLengthColumnPage extends ColumnPage { private void updatePageSize(int rowId) { if (pageSize < rowId) { - pageSize = rowId; + // update the actual number of rows + pageSize = rowId + 1; } } @@ -359,7 +358,7 @@ public class UnsafeFixLengthColumnPage extends ColumnPage { @Override public float[] getFloatPage() { -float[] data = new float[getPageSize()]; +float[] data = new float[getEndLoop()]; for (long i = 0; i < data.length; i++) { long offset = i << floatBits; data[(int)i] = CarbonUnsafe.getUnsafe().getFloat(baseAddress, baseOffset + offset); @@ -369,7 +368,7 @@ public class UnsafeFixLengthColumnPage extends ColumnPage { @Override public double[] getDoublePage() { -double[] data = new double[getPageSize()]; +double[] data = new double[getEndLoop()]; for (long i = 0; i < data.length; i++) { long offset = i << doubleBits; data[(int)i] = CarbonUnsafe.getUnsafe().getDouble(baseAddress, baseOffset + offset); @@ -541,26 +540,6 @@ public class UnsafeFixLengthColumnPage extends ColumnPage { return totalLength; } - @Override public byte[] compress(Compressor compressor) throws MemoryException, IOException { -if (UnsafeMemoryManager.isOffHeap() && compressor.supportUnsafe()) { - // use raw compression and copy to byte[] - int inputSize = totalLength; - long compressedMaxSize = compressor.maxCompressedLength(inputSize); - MemoryBlock compressed = - UnsafeMemoryManager.allocateMemoryWithRetry(taskId, compressedMaxSize); - long outSize = compressor.rawCompress(baseOffset, inputSize, compressed.getBaseOffset()); - assert outSize < Integer.MAX_VALUE; - byte[] output = new byte[(int) outSize]; - CarbonUnsafe.getUnsafe() - .copyMemory(compressed.getBaseObject(), compressed.getBaseOffset(), output, - CarbonUnsafe.BYTE_ARRAY_OFFSET, outSize); - UnsafeMemoryManager.INSTANCE.freeMemory(taskId, compressed); - return output; -} else { - return super.compress(compressor); -} - } - /** * reallocate memory if capacity length than current size + request size */
[carbondata] branch master updated: [CARBONDATA-3241] Refactor the requested scan columns and the projection columns
This is an automated email from the ASF dual-hosted git repository. manishgupta88 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/carbondata.git The following commit(s) were added to refs/heads/master by this push: new 86713f5 [CARBONDATA-3241] Refactor the requested scan columns and the projection columns 86713f5 is described below commit 86713f505a80d2a22912b15d65aa008324ad29e4 Author: dhatchayani AuthorDate: Thu Jan 10 15:00:51 2019 +0530 [CARBONDATA-3241] Refactor the requested scan columns and the projection columns Refactor the requested columns methods by changing both the scan list and the projection list together. This closes #3062 --- .../execution/strategy/CarbonLateDecodeStrategy.scala | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala index a23a191..0f706af 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala @@ -367,7 +367,7 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { // In case of implicit exist we should disable vectorPushRowFilters as it goes in IUD flow // to get the positionId or tupleID var implicitExisted = false - val updatedProjects = projects.map { + var updatedProjects = projects.map { case a@Alias(s: ScalaUDF, name) if name.equalsIgnoreCase(CarbonCommonConstants.POSITION_ID) || name.equalsIgnoreCase(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID) => @@ -388,9 +388,15 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { } case other => other } + val updatedColumns: (Seq[Attribute], Seq[Expression]) = getRequestedColumns(relation, +projectsAttr, +filterSet, +handledSet, +newProjectList, +updatedProjects) // Don't request columns that are only referenced by pushed filters. - val requestedColumns = -getRequestedColumns(relation, projectsAttr, filterSet, handledSet, newProjectList) + val requestedColumns = updatedColumns._1 + updatedProjects = updatedColumns._2 var updateRequestedColumns = if (!vectorPushRowFilters && !implicitExisted && !hasDictionaryFilterCols @@ -449,9 +455,10 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { projectsAttr: Seq[Attribute], filterSet: AttributeSet, handledSet: AttributeSet, - newProjectList: Seq[Attribute]) = { -(projectsAttr.to[mutable.LinkedHashSet] ++ filterSet -- handledSet) - .map(relation.attributeMap).toSeq ++ newProjectList + newProjectList: Seq[Attribute], + updatedProjects: Seq[Expression]): (Seq[Attribute], Seq[Expression]) = { +((projectsAttr.to[mutable.LinkedHashSet] ++ filterSet -- handledSet) + .map(relation.attributeMap).toSeq ++ newProjectList, updatedProjects) } private def getDataSourceScan(relation: LogicalRelation,
carbondata git commit: [CARBONDATA-3223] Fixed Wrong Datasize and Indexsize calculation for old store using Show Segments
Repository: carbondata Updated Branches: refs/heads/master 923dab1b5 -> 72da33495 [CARBONDATA-3223] Fixed Wrong Datasize and Indexsize calculation for old store using Show Segments Problem: Table Created and Loading on older version(1.1) was showing data-size and index-size 0B when refreshed on new version. This was because when the data-size was coming as "null" we were not computing it, directly assigning 0 value to it. Solution: Showing the old datasize and indexsize as NA. Also refactored SetQuerySegment code for better understandability. This closes #3047 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/72da3349 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/72da3349 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/72da3349 Branch: refs/heads/master Commit: 72da33495362fdbf4cd0e24331ca77a1fab470f6 Parents: 923dab1 Author: manishnalla1994 Authored: Wed Jan 2 18:00:36 2019 +0530 Committer: manishgupta88 Committed: Mon Jan 7 11:33:06 2019 +0530 -- .../hadoop/api/CarbonInputFormat.java | 25 +++- .../org/apache/carbondata/api/CarbonStore.scala | 4 ++-- .../org/apache/spark/sql/CarbonCountStar.scala | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/72da3349/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java -- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java index 24691f2..26144e2 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java @@ -277,12 +277,7 @@ m filterExpression public static void setQuerySegment(Configuration conf, AbsoluteTableIdentifier identifier) { String dbName = identifier.getCarbonTableIdentifier().getDatabaseName().toLowerCase(); String tbName = identifier.getCarbonTableIdentifier().getTableName().toLowerCase(); -String segmentNumbersFromProperty = CarbonProperties.getInstance() -.getProperty(CarbonCommonConstants.CARBON_INPUT_SEGMENTS + dbName + "." + tbName, "*"); -if (!segmentNumbersFromProperty.trim().equals("*")) { - CarbonInputFormat.setSegmentsToAccess(conf, - Segment.toSegmentList(segmentNumbersFromProperty.split(","), null)); -} +getQuerySegmentToAccess(conf, dbName, tbName); } /** @@ -827,4 +822,22 @@ m filterExpression } return projectColumns.toArray(new String[projectColumns.size()]); } + + private static void getQuerySegmentToAccess(Configuration conf, String dbName, String tableName) { +String segmentNumbersFromProperty = CarbonProperties.getInstance() +.getProperty(CarbonCommonConstants.CARBON_INPUT_SEGMENTS + dbName + "." + tableName, "*"); +if (!segmentNumbersFromProperty.trim().equals("*")) { + CarbonInputFormat.setSegmentsToAccess(conf, + Segment.toSegmentList(segmentNumbersFromProperty.split(","), null)); +} + } + + /** + * Set `CARBON_INPUT_SEGMENTS` from property to configuration + */ + public static void setQuerySegment(Configuration conf, CarbonTable carbonTable) { +String tableName = carbonTable.getTableName(); +getQuerySegmentToAccess(conf, carbonTable.getDatabaseName(), tableName); + } + } http://git-wip-us.apache.org/repos/asf/carbondata/blob/72da3349/integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala index da9d4c2..11db430 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/api/CarbonStore.scala @@ -107,8 +107,8 @@ object CarbonStore { (indices.asScala.map(_.getFile_size).sum, FileFactory.getCarbonFile(indexPath).getSize) } else { // for batch segment, we can get the data size from table status file directly -(if (load.getDataSize == null) 0L else load.getDataSize.toLong, - if (load.getIndexSize == null) 0L else load.getIndexSize.toLong) +(if (load.getDataSize == null) -1L else load.getDataSize.toLong, + if (load.getIndexSize == null) -1L else load.getIndexSize
carbondata git commit: [CARBONDATA-3149] Documentation for alter table column rename
Repository: carbondata Updated Branches: refs/heads/master bc1e94472 -> fc4d51176 [CARBONDATA-3149] Documentation for alter table column rename Added documentation for alter table column rename This closes #3044 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fc4d5117 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fc4d5117 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fc4d5117 Branch: refs/heads/master Commit: fc4d51176399f6c22a745a415ecb9096a633dbc9 Parents: bc1e944 Author: akashrn5 Authored: Wed Jan 2 12:09:05 2019 +0530 Committer: manishgupta88 Committed: Fri Jan 4 16:52:21 2019 +0530 -- docs/ddl-of-carbondata.md | 20 ++-- 1 file changed, 14 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/fc4d5117/docs/ddl-of-carbondata.md -- diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md index d1a4794..aaa2eda 100644 --- a/docs/ddl-of-carbondata.md +++ b/docs/ddl-of-carbondata.md @@ -47,7 +47,8 @@ CarbonData DDL statements are documented here,which includes: * [RENAME TABLE](#rename-table) * [ADD COLUMNS](#add-columns) * [DROP COLUMNS](#drop-columns) -* [CHANGE DATA TYPE](#change-data-type) +* [RENAME COLUMN](#change-column-nametype) +* [CHANGE COLUMN NAME/TYPE](#change-column-nametype) * [MERGE INDEXES](#merge-index) * [SET/UNSET Local Dictionary Properties](#set-and-unset-for-local-dictionary-properties) * [DROP TABLE](#drop-table) @@ -681,13 +682,13 @@ Users can specify which columns to include and exclude for local dictionary gene **NOTE:** Drop Complex child column is not supported. - - # CHANGE DATA TYPE + - # CHANGE COLUMN NAME/TYPE - This command is used to change the data type from INT to BIGINT or decimal precision from lower to higher. + This command is used to change column name and the data type from INT to BIGINT or decimal precision from lower to higher. Change of decimal data type from lower precision to higher precision will only be supported for cases where there is no data loss. ``` - ALTER TABLE [db_name.]table_name CHANGE col_name col_name changed_column_type + ALTER TABLE [db_name.]table_name CHANGE col_old_name col_new_name column_type ``` Valid Scenarios @@ -695,10 +696,10 @@ Users can specify which columns to include and exclude for local dictionary gene - Valid scenario - Change of decimal precision from (10,2) to (12,3) is valid as the total number of digits are increased by 2 but scale is increased only by 1 which will not lead to any data loss. - **NOTE:** The allowed range is 38,38 (precision, scale) and is a valid upper case scenario which is not resulting in data loss. - Example1:Changing data type of column a1 from INT to BIGINT. + Example1:Change column a1's name to a2 and its data type from INT to BIGINT. ``` - ALTER TABLE test_db.carbon CHANGE a1 a1 BIGINT + ALTER TABLE test_db.carbon CHANGE a1 a2 BIGINT ``` Example2:Changing decimal precision of column a1 from 10 to 18. @@ -707,6 +708,13 @@ Users can specify which columns to include and exclude for local dictionary gene ALTER TABLE test_db.carbon CHANGE a1 a1 DECIMAL(18,2) ``` + Example3:Change column a3's name to a4. + + ``` + ALTER TABLE test_db.carbon CHANGE a3 a4 STRING + ``` + + **NOTE:** Once the column is renamed, user has to take care about replacing the fileheader with the new name or changing the column header in csv file. - # MERGE INDEX This command is used to merge all the CarbonData index files (.carbonindex) inside a segment to a single CarbonData index merge file (.carbonindexmerge). This enhances the first query performance.
carbondata git commit: [CARBONDATA-3202]update the schema to session catalog after add column, drop column and column rename
Repository: carbondata Updated Branches: refs/heads/master 3e4638b33 -> b0733ecbf [CARBONDATA-3202]update the schema to session catalog after add column, drop column and column rename Problem: 1. For alter table rename, once we change the table name in carbon, we fire alter table rename DDL using hive client. But for add, drop and column rename Spark does not support there features, but hive supports. so after rename, or add or drop column, the new updated schema is not updated in catalog. 2. after column rename column comment is not getting copied to renamed column Solution: 1. We can directly call the spark API alterTableDataSchema by passing the updated schema, which in turn updates the shema in sessioncatalog. Since this API is supported from spark2.1 onward, codes changes will be for spark 2.2 and spark2.3, behavior with spark2.1 remains the same. 2. while updating the catalog schema, if column has comment, put in column metadata This closes #3027 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b0733ecb Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b0733ecb Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b0733ecb Branch: refs/heads/master Commit: b0733ecbf380d7956dee57a9048dd7537620744e Parents: 3e4638b Author: akashrn5 Authored: Thu Dec 27 11:31:44 2018 +0530 Committer: manishgupta88 Committed: Mon Dec 31 08:50:22 2018 +0530 -- .../ThriftWrapperSchemaConverterImpl.java | 1 + .../sql/hive/CarbonInMemorySessionState.scala | 15 +++--- .../spark/sql/hive/CarbonSessionState.scala | 55 +--- .../spark/sql/hive/CarbonSessionUtil.scala | 55 +++- .../CarbonAlterTableAddColumnCommand.scala | 12 +++-- ...terTableColRenameDataTypeChangeCommand.scala | 24 ++--- .../CarbonAlterTableDropColumnCommand.scala | 9 ++-- .../spark/sql/hive/CarbonSessionCatalog.scala | 26 ++--- .../org/apache/spark/util/AlterTableUtil.scala | 16 ++ .../spark/sql/hive/CarbonSessionState.scala | 31 ++- .../restructure/AlterTableRevertTestCase.scala | 1 - .../AlterTableColumnRenameTestCase.scala| 11 +++- 12 files changed, 158 insertions(+), 98 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/b0733ecb/core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java b/core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java index 13f592f..dca7fa2 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/converter/ThriftWrapperSchemaConverterImpl.java @@ -548,6 +548,7 @@ public class ThriftWrapperSchemaConverterImpl implements SchemaConverter { if (sortColumns != null) { wrapperColumnSchema.setSortColumn(true); } + wrapperColumnSchema.setColumnProperties(externalColumnSchema.getColumnProperties()); } wrapperColumnSchema.setFunction(externalColumnSchema.getAggregate_function()); List parentColumnTableRelation = http://git-wip-us.apache.org/repos/asf/carbondata/blob/b0733ecb/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala -- diff --git a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala index ba6aae5..da60fb0 100644 --- a/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala +++ b/integration/spark2/src/main/commonTo2.2And2.3/org/apache/spark/sql/hive/CarbonInMemorySessionState.scala @@ -35,6 +35,7 @@ import org.apache.spark.sql.parser.CarbonSparkSqlParser import org.apache.spark.sql.types.{StructField, StructType} import org.apache.spark.sql.{CarbonEnv, SparkSession} +import org.apache.carbondata.core.metadata.schema.table.column.{ColumnSchema => ColumnSchema} import org.apache.carbondata.core.util.CarbonUtil import org.apache.carbondata.core.util.path.CarbonTablePath import org.apache.carbondata.format.TableInfo @@ -79,15 +80,13 @@ class InMemorySessionCatalog( override def alterTable(tableIdentifier: TableIdentifier, schemaParts: String, - cols: Option[Seq[org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema]]) - : Unit = { +
carbondata git commit: [CARBONDATA-3196] [CARBONDATA-3203]Fixed Compaction for Complex types with Dictionary Include and also supported Compaction for restructured table
Repository: carbondata Updated Branches: refs/heads/master f5c1b7bbd -> 7c4e79fca [CARBONDATA-3196] [CARBONDATA-3203]Fixed Compaction for Complex types with Dictionary Include and also supported Compaction for restructured table Problem1: Compaction Failing for Complex datatypes with Dictionary Include as KeyGenenrator was not being set in model for Dictionary Include Complex Columns and dictionary include complex columns were not handled for finding cardinality. Solution: Handled both these issues by setting KeyGenerator and storing cardinality of Complex dictionary include columns. Problem2: Compaction was failing for restructured table containing dictionary include complex columns. Solution: Handled complex columns for this case by inserting correct indices of the columns. This closes #3022 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7c4e79fc Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7c4e79fc Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7c4e79fc Branch: refs/heads/master Commit: 7c4e79fca8e6aac044bbadaf1210b1be2f3b8a8e Parents: f5c1b7b Author: manishnalla1994 Authored: Mon Dec 24 17:37:36 2018 +0530 Committer: manishgupta88 Committed: Fri Dec 28 17:27:13 2018 +0530 -- .../core/scan/wrappers/ByteArrayWrapper.java| 4 ++ .../src/test/resources/structofarray.csv| 10 +++ .../complexType/TestCompactionComplexType.scala | 65 .../loading/CarbonDataLoadConfiguration.java| 32 +- .../merger/CompactionResultSortProcessor.java | 14 - .../sort/sortdata/SortParameters.java | 2 +- .../sort/sortdata/TableFieldStat.java | 34 +- .../store/CarbonFactDataHandlerModel.java | 31 +++--- .../store/CarbonFactHandlerFactory.java | 1 - .../util/CarbonDataProcessorUtil.java | 33 ++ 10 files changed, 171 insertions(+), 55 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/7c4e79fc/core/src/main/java/org/apache/carbondata/core/scan/wrappers/ByteArrayWrapper.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/wrappers/ByteArrayWrapper.java b/core/src/main/java/org/apache/carbondata/core/scan/wrappers/ByteArrayWrapper.java index 65f29d4..1b903f7 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/wrappers/ByteArrayWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/wrappers/ByteArrayWrapper.java @@ -81,6 +81,10 @@ public class ByteArrayWrapper implements Comparable, Serializa return this.noDictionaryKeys[index]; } + public byte[] getComplexKeyByIndex(int index) { +return this.complexTypesKeys[index]; + } + /** * to get the no dictionary column data * http://git-wip-us.apache.org/repos/asf/carbondata/blob/7c4e79fc/integration/spark-common-test/src/test/resources/structofarray.csv -- diff --git a/integration/spark-common-test/src/test/resources/structofarray.csv b/integration/spark-common-test/src/test/resources/structofarray.csv new file mode 100644 index 000..ef21b44 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/structofarray.csv @@ -0,0 +1,10 @@ +Cust,2015,1,20,M,SSC,Y,123456789$2015-01-01 00:00:00$100&3000$100.123&3000.234$United Kingdom&England$2015-01-01 00:00:00&2014-01-01 00:00:00,42,104,160,325046028.8,859616748.6 +Cust0001,2015,1,30,F,Degree,N,123456790$2015-01-02 00:00:00$101&3000$101.123&3001.234$United States&MO$2015-01-02 00:00:00&2014-01-02 00:00:00,141,181,54,378476092.1,818599132.6 +Cust0002,2015,1,40,M,graduation,D,123456791$2015-01-03 00:00:00$102&3000$102.123&3002.234$United States&OR$2015-01-03 00:00:00&2014-01-03 00:00:00,138,43,175,408335001.4,906020942.6 +Cust0003,2015,1,50,F,PG,Y,123456792$2015-01-04 00:00:00$103&3000$103.123&3003.234$Australia&Victoria$2015-01-04 00:00:00&2014-01-04 00:00:00,96,63,184,493146274.5,556184083.3 +Cust0004,2015,1,60,M,MS,N,123456793$2015-01-05 00:00:00$104&3000$104.123&3004.234$United States&AL$2015-01-05 00:00:00&2014-01-05 00:00:00,115,172,165,457941392.3,641744932.5 +Cust0005,2015,1,70,F,Doctor,D,123456794$2015-01-06 00:00:00$105&3000$105.123&3005.234$United States&NJ$2015-01-06 00:00:00&2014-01-06 00:00:00,178,192,178,112452170.2,502438883.3 +Cust0006,2015,1,80,M,Layer,Y,123456795$2015-01-07 00:00:00$106&3000$106.123&3006.234$United States&IL$2015-01-07
carbondata git commit: [CARBONDATA-3017] Map DDL Support
Repository: carbondata Updated Branches: refs/heads/master ebdd5486e -> 90f63a0cc [CARBONDATA-3017] Map DDL Support Support Create DDL for Map type. This closes #2980 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/90f63a0c Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/90f63a0c Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/90f63a0c Branch: refs/heads/master Commit: 90f63a0cc7e0ba11b9a850cbd19a1a0dd3212e4e Parents: ebdd548 Author: manishnalla1994 Authored: Tue Oct 16 15:18:08 2018 +0530 Committer: manishgupta88 Committed: Fri Dec 14 22:37:42 2018 +0530 -- .../apache/carbondata/core/util/CarbonUtil.java | 4 + .../hadoop/api/CarbonTableOutputFormat.java | 17 +- .../TestCreateDDLForComplexMapType.scala| 445 +++ .../LocalDictionarySupportLoadTableTest.scala | 17 + .../spark/rdd/NewCarbonDataLoadRDD.scala| 10 +- .../carbondata/spark/util/CarbonScalaUtil.scala | 1 + .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 43 +- .../streaming/CarbonAppendableStreamSink.scala | 9 +- .../spark/rdd/CarbonDataRDDFactory.scala| 4 +- .../CarbonAlterTableCompactionCommand.scala | 6 + .../management/CarbonLoadDataCommand.scala | 6 +- .../table/CarbonCreateTableCommand.scala| 2 +- .../spark/util/AllDictionaryTestCase.scala | 4 +- .../util/ExternalColumnDictionaryTestCase.scala | 4 +- .../TestStreamingTableWithRowParser.scala | 3 +- .../loading/ComplexDelimitersEnum.java | 39 ++ .../loading/DataLoadProcessBuilder.java | 7 +- .../loading/model/CarbonLoadModel.java | 38 +- .../loading/model/CarbonLoadModelBuilder.java | 11 +- .../processing/loading/model/LoadOption.java| 17 +- .../loading/parser/CarbonParserFactory.java | 25 +- .../loading/parser/impl/ArrayParserImpl.java| 6 +- .../loading/parser/impl/MapParserImpl.java | 60 +++ .../loading/parser/impl/RowParserImpl.java | 8 +- .../sdk/file/CarbonWriterBuilder.java | 1 + .../streaming/parser/RowStreamParserImp.scala | 2 + 26 files changed, 715 insertions(+), 74 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/90f63a0c/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index ac52728..fc4704e 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -597,6 +597,10 @@ public final class CarbonUtil { */ public static String delimiterConverter(String delimiter) { switch (delimiter) { + case "\\001": + case "\\002": + case "\\003": + case "\\004": case "|": case "*": case ".": http://git-wip-us.apache.org/repos/asf/carbondata/blob/90f63a0c/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java -- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java index dbd2f0e..16486d0 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java @@ -37,6 +37,7 @@ import org.apache.carbondata.core.util.CarbonThreadFactory; import org.apache.carbondata.core.util.ObjectSerializationUtil; import org.apache.carbondata.core.util.ThreadLocalSessionInfo; import org.apache.carbondata.hadoop.internal.ObjectArrayWritable; +import org.apache.carbondata.processing.loading.ComplexDelimitersEnum; import org.apache.carbondata.processing.loading.DataLoadExecutor; import org.apache.carbondata.processing.loading.TableProcessingOperations; import org.apache.carbondata.processing.loading.iterator.CarbonOutputIteratorWrapper; @@ -338,11 +339,19 @@ public class CarbonTableOutputFormat extends FileOutputFormat 1) { - model.setComplexDelimiterLevel2(split[1]); +model.setComplexDelimiter(split[0]); +if (split.length > 2) { + model.setComplexDelimiter(split[1]); + model.setComplexDelimiter(split[2]); +} else if (split.length > 1) { + model.setComplexDelimiter(split[1]); } model.setDateFormat( conf.get( http://git-wip-us.apache.org/repos/asf/carbondata/blob/90f63a0c/integration/spa
carbondata git commit: [CARBONDATA-3134] fixed null values when cachelevel is set as blocklet
Repository: carbondata Updated Branches: refs/heads/master 295734cc8 -> a5f080b67 [CARBONDATA-3134] fixed null values when cachelevel is set as blocklet Problem: For each blocklet an object of SegmentPropertiesAndSchemaHolder is created to store the schema used for query. This object is created only if no other blocklet has the same schema. To check the schema we are comparing List, as the equals method in ColumnSchema does not check for columnUniqueId therefore this check is failing and the new restructured blocklet is using the schema of the old blocklet. Due to this the newly added column is being ignored as the old blocklet schema specifies that the column is delete(alter drop). Solution: Instead of checking the equality through equals and hashcode, write a new implementation for both and check based on columnUniqueId. This closes #2956 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/a5f080b6 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/a5f080b6 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/a5f080b6 Branch: refs/heads/master Commit: a5f080b6752a7a789463455edbbe4b888f6694e3 Parents: 295734c Author: kunal642 Authored: Tue Nov 27 14:13:27 2018 +0530 Committer: manishgupta88 Committed: Wed Nov 28 15:43:24 2018 +0530 -- .../block/SegmentPropertiesAndSchemaHolder.java | 40 ++-- .../schema/table/column/ColumnSchema.java | 4 ++ .../StandardPartitionTableQueryTestCase.scala | 2 +- 3 files changed, 42 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/a5f080b6/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java index 1b7e1f8..6f9a93d 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java @@ -18,6 +18,8 @@ package org.apache.carbondata.core.datastore.block; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -332,13 +334,45 @@ public class SegmentPropertiesAndSchemaHolder { } SegmentPropertiesAndSchemaHolder.SegmentPropertiesWrapper other = (SegmentPropertiesAndSchemaHolder.SegmentPropertiesWrapper) obj; - return tableIdentifier.equals(other.tableIdentifier) && columnsInTable - .equals(other.columnsInTable) && Arrays + return tableIdentifier.equals(other.tableIdentifier) && checkColumnSchemaEquality( + columnsInTable, other.columnsInTable) && Arrays .equals(columnCardinality, other.columnCardinality); } +private boolean checkColumnSchemaEquality(List obj1, List obj2) { + if (obj1 == null || obj2 == null || (obj1.size() != obj2.size())) { +return false; + } + List clonedObj1 = new ArrayList<>(obj1); + List clonedObj2 = new ArrayList<>(obj2); + clonedObj1.addAll(obj1); + clonedObj2.addAll(obj2); + sortList(clonedObj1); + sortList(clonedObj2); + boolean exists = true; + for (int i = 0; i < obj1.size(); i++) { +if (!clonedObj1.get(i).equalsWithStrictCheck(clonedObj2.get(i))) { + exists = false; + break; +} + } + return exists; +} + +private void sortList(List columnSchemas) { + Collections.sort(columnSchemas, new Comparator() { +@Override public int compare(ColumnSchema o1, ColumnSchema o2) { + return o1.getColumnUniqueId().compareTo(o2.getColumnUniqueId()); +} + }); +} + @Override public int hashCode() { - return tableIdentifier.hashCode() + columnsInTable.hashCode() + Arrays + int allColumnsHashCode = 0; + for (ColumnSchema columnSchema: columnsInTable) { +allColumnsHashCode = allColumnsHashCode + columnSchema.strictHashCode(); + } + return tableIdentifier.hashCode() + allColumnsHashCode + Arrays .hashCode(columnCardinality); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/a5f080b6/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/
[1/2] carbondata git commit: [CARBONDATA-3113] Fixed Local Dictionary Query Performance and Added reusable buffer for direct flow
Repository: carbondata Updated Branches: refs/heads/master 647bfbaea -> d79ba999f http://git-wip-us.apache.org/repos/asf/carbondata/blob/d79ba999/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java index e5312f3..51dfbf2 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java @@ -308,7 +308,8 @@ public abstract class ColumnPage { private static ColumnPage newDecimalPage(ColumnPageEncoderMeta meta, byte[] lvEncodedByteArray) throws MemoryException { -return VarLengthColumnPageBase.newDecimalColumnPage(meta, lvEncodedByteArray); +return VarLengthColumnPageBase +.newDecimalColumnPage(meta, lvEncodedByteArray, lvEncodedByteArray.length); } private static ColumnPage newLVBytesPage(TableSpec.ColumnSpec columnSpec, http://git-wip-us.apache.org/repos/asf/carbondata/blob/d79ba999/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java index 9bed89f..1de8201 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecoderBasedFallbackEncoder.java @@ -88,7 +88,8 @@ public class DecoderBasedFallbackEncoder implements Callablehttp://git-wip-us.apache.org/repos/asf/carbondata/blob/d79ba999/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java index 81bb1b5..0f409f6 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java @@ -125,7 +125,7 @@ public abstract class VarLengthColumnPageBase extends ColumnPage { * Create a new column page for decimal page */ public static ColumnPage newDecimalColumnPage(ColumnPageEncoderMeta meta, - byte[] lvEncodedBytes) throws MemoryException { + byte[] lvEncodedBytes, int actualDataLength) throws MemoryException { TableSpec.ColumnSpec columnSpec = meta.getColumnSpec(); DecimalConverterFactory.DecimalConverter decimalConverter = DecimalConverterFactory.INSTANCE.getDecimalConverter(columnSpec.getPrecision(), @@ -137,7 +137,7 @@ public abstract class VarLengthColumnPageBase extends ColumnPage { CarbonCommonConstants.INT_SIZE_IN_BYTE, meta.getCompressorName()); } else { // Here the size is always fixed. - return getDecimalColumnPage(meta, lvEncodedBytes, size); + return getDecimalColumnPage(meta, lvEncodedBytes, size, actualDataLength); } } @@ -160,7 +160,7 @@ public abstract class VarLengthColumnPageBase extends ColumnPage { } private static ColumnPage getDecimalColumnPage(ColumnPageEncoderMeta meta, - byte[] lvEncodedBytes, int size) throws MemoryException { + byte[] lvEncodedBytes, int size, int actualDataLength) throws MemoryException { TableSpec.ColumnSpec columnSpec = meta.getColumnSpec(); String compressorName = meta.getCompressorName(); TableSpec.ColumnSpec spec = TableSpec.ColumnSpec @@ -171,7 +171,7 @@ public abstract class VarLengthColumnPageBase extends ColumnPage { int offset; int rowId = 0; int counter = 0; -for (offset = 0; offset < lvEncodedBytes.length; offset += size) { +for (offset = 0; offset < actualDataLength; offset += size) { rowOffset.putInt(counter, offset); rowId++; counter++; http://git-wip-us.apache.org/repos/asf/carbondata/blob/d79ba999/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java index 6f36c08..b5dc502 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageD
[2/2] carbondata git commit: [CARBONDATA-3113] Fixed Local Dictionary Query Performance and Added reusable buffer for direct flow
[CARBONDATA-3113] Fixed Local Dictionary Query Performance and Added reusable buffer for direct flow Following optimizations done in the PR. 1. Added reusable buffer for direct flow In query for each page each column it is creating a byte array, when number of columns are high it is causing lots of minor gc and degrading query performance, as each page is getting uncompressed one by one we can use same buffer for all the columns and based on requested size it will resize. 2. Fixed Local Dictionary performance issue.Reverted back #2895 and fixed NPE issue by setting null for local dictionary to vector In safe and Unsafe VariableLengthDataChunkStore This closes #2872 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d79ba999 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d79ba999 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d79ba999 Branch: refs/heads/master Commit: d79ba999f2f683da4be7554a8aba67b9dac01975 Parents: 647bfba Author: kumarvishal09 Authored: Sun Oct 28 20:39:05 2018 +0530 Committer: manishgupta88 Committed: Wed Nov 21 14:45:00 2018 +0530 -- .../core/datastore/ReusableDataBuffer.java | 55 .../chunk/impl/DimensionRawColumnChunk.java | 15 +++--- .../impl/FixedLengthDimensionColumnPage.java| 16 +++--- .../chunk/impl/MeasureRawColumnChunk.java | 15 +++--- .../impl/VariableLengthDimensionColumnPage.java | 16 +++--- .../reader/DimensionColumnChunkReader.java | 6 ++- .../chunk/reader/MeasureColumnChunkReader.java | 10 ++-- .../reader/dimension/AbstractChunkReader.java | 4 +- ...mpressedDimensionChunkFileBasedReaderV1.java | 28 ++ ...mpressedDimensionChunkFileBasedReaderV2.java | 26 ++--- ...essedDimChunkFileBasedPageLevelReaderV3.java | 7 ++- ...mpressedDimensionChunkFileBasedReaderV3.java | 54 --- .../measure/AbstractMeasureChunkReader.java | 7 +-- ...CompressedMeasureChunkFileBasedReaderV1.java | 5 +- ...CompressedMeasureChunkFileBasedReaderV2.java | 12 +++-- ...CompressedMeasureChunkFileBasedReaderV3.java | 34 ++-- ...essedMsrChunkFileBasedPageLevelReaderV3.java | 6 ++- .../chunk/store/DimensionChunkStoreFactory.java | 18 --- .../impl/LocalDictDimensionDataChunkStore.java | 17 -- .../safe/AbstractNonDictionaryVectorFiller.java | 23 +--- ...ariableIntLengthDimensionDataChunkStore.java | 5 +- ...feVariableLengthDimensionDataChunkStore.java | 19 --- ...iableShortLengthDimensionDataChunkStore.java | 5 +- .../UnsafeAbstractDimensionDataChunkStore.java | 4 +- ...nsafeFixedLengthDimensionDataChunkStore.java | 4 +- ...ariableIntLengthDimensionDataChunkStore.java | 4 +- ...feVariableLengthDimensionDataChunkStore.java | 5 +- ...iableShortLengthDimensionDataChunkStore.java | 4 +- .../core/datastore/columnar/UnBlockIndexer.java | 4 +- .../compression/AbstractCompressor.java | 3 ++ .../core/datastore/compression/Compressor.java | 6 +++ .../datastore/compression/SnappyCompressor.java | 20 +++ .../datastore/compression/ZstdCompressor.java | 8 +++ .../core/datastore/page/ColumnPage.java | 3 +- .../page/DecoderBasedFallbackEncoder.java | 3 +- .../datastore/page/VarLengthColumnPageBase.java | 8 +-- .../page/encoding/ColumnPageDecoder.java| 7 ++- .../adaptive/AdaptiveDeltaFloatingCodec.java| 19 --- .../adaptive/AdaptiveDeltaIntegralCodec.java| 22 +--- .../adaptive/AdaptiveFloatingCodec.java | 22 +--- .../adaptive/AdaptiveIntegralCodec.java | 20 --- .../encoding/compress/DirectCompressCodec.java | 23 +--- .../datastore/page/encoding/rle/RLECodec.java | 10 ++-- .../executor/impl/AbstractQueryExecutor.java| 45 +--- .../scan/executor/infos/BlockExecutionInfo.java | 21 .../core/scan/result/BlockletScannedResult.java | 23 +--- .../scan/result/vector/CarbonDictionary.java| 4 ++ .../vector/impl/CarbonDictionaryImpl.java | 10 .../core/scan/scanner/LazyPageLoader.java | 10 ++-- .../impl/FixedLengthDimensionDataChunkTest.java | 3 +- .../executer/IncludeFilterExecuterImplTest.java | 4 +- .../carbondata/core/util/CarbonUtilTest.java| 32 ++-- .../dataload/TestLoadDataWithCompression.scala | 15 ++ .../VectorizedCarbonRecordReader.java | 1 - .../apache/carbondata/tool/ScanBenchmark.java | 4 +- 55 files changed, 546 insertions(+), 228 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d79ba999/core/src/main/java/org/apache/carbondata/core/datastore/ReusableDataBuffer.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore
[2/3] carbondata git commit: [CARBONDATA-3112] Optimise decompressing while filling the vector during conversion of primitive typess
http://git-wip-us.apache.org/repos/asf/carbondata/blob/bed51ba7/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java index fd94344..7b7c0b6 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java @@ -23,12 +23,13 @@ import java.util.BitSet; import java.util.List; import java.util.Map; +import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.datastore.compression.Compressor; import org.apache.carbondata.core.datastore.compression.CompressorFactory; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.datastore.page.ColumnPageValueConverter; -import org.apache.carbondata.core.datastore.page.DecimalColumnPage; import org.apache.carbondata.core.datastore.page.LazyColumnPage; +import org.apache.carbondata.core.datastore.page.VarLengthColumnPageBase; import org.apache.carbondata.core.datastore.page.encoding.ColumnPageCodec; import org.apache.carbondata.core.datastore.page.encoding.ColumnPageDecoder; import org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder; @@ -105,17 +106,32 @@ public class DirectCompressCodec implements ColumnPageCodec { @Override public void decodeAndFillVector(byte[] input, int offset, int length, - ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded) + ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded, int pageSize) throws MemoryException, IOException { -ColumnPage decodedPage; +Compressor compressor = + CompressorFactory.getInstance().getCompressor(meta.getCompressorName()); +byte[] unCompressData = compressor.unCompressByte(input, offset, length); if (DataTypes.isDecimal(dataType)) { - decodedPage = ColumnPage.decompressDecimalPage(meta, input, offset, length); - vectorInfo.decimalConverter = ((DecimalColumnPage) decodedPage).getDecimalConverter(); + TableSpec.ColumnSpec columnSpec = meta.getColumnSpec(); + DecimalConverterFactory.DecimalConverter decimalConverter = + DecimalConverterFactory.INSTANCE + .getDecimalConverter(columnSpec.getPrecision(), columnSpec.getScale()); + vectorInfo.decimalConverter = decimalConverter; + if (DataTypes.isDecimal(meta.getStoreDataType())) { +ColumnPage decimalColumnPage = +VarLengthColumnPageBase.newDecimalColumnPage(meta, unCompressData); +decimalConverter.fillVector(decimalColumnPage.getByteArrayPage(), pageSize, vectorInfo, +nullBits, meta.getStoreDataType()); + } else { +converter +.decodeAndFillVector(unCompressData, vectorInfo, nullBits, meta.getStoreDataType(), +pageSize); + } } else { - decodedPage = ColumnPage.decompress(meta, input, offset, length, isLVEncoded); + converter + .decodeAndFillVector(unCompressData, vectorInfo, nullBits, meta.getStoreDataType(), + pageSize); } -decodedPage.setNullBits(nullBits); -converter.decodeAndFillVector(decodedPage, vectorInfo); } @Override public ColumnPage decode(byte[] input, int offset, int length, boolean isLVEncoded) @@ -203,17 +219,15 @@ public class DirectCompressCodec implements ColumnPageCodec { } @Override -public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo) { +public void decodeAndFillVector(byte[] pageData, ColumnVectorInfo vectorInfo, BitSet nullBits, +DataType pageDataType, int pageSize) { CarbonColumnVector vector = vectorInfo.vector; - BitSet nullBits = columnPage.getNullBits(); DataType vectorDataType = vector.getType(); - DataType pageDataType = columnPage.getDataType(); - int pageSize = columnPage.getPageSize(); BitSet deletedRows = vectorInfo.deletedRows; vector = ColumnarVectorWrapperDirectFactory .getDirectVectorWrapperFactory(vector, vectorInfo.invertedIndex, nullBits, deletedRows, true, false); - fillVector(columnPage, vector, vectorDataType, pageDataType, pageSize, vectorInfo); + fillVector(pageData, vector, vectorDataType, pageDataType, pageSize, vectorInfo, nullBits); if (deletedRows == null || deletedRows.isEmpty()) { for (int i = nullBits.nextSetBit(0); i >= 0; i = nullBits.nextSe
[1/3] carbondata git commit: [CARBONDATA-3112] Optimise decompressing while filling the vector during conversion of primitive typess
Repository: carbondata Updated Branches: refs/heads/master 51b10ba70 -> bed51ba77 http://git-wip-us.apache.org/repos/asf/carbondata/blob/bed51ba7/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala index b4dd1b1..16763d3 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala @@ -303,6 +303,10 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { // applying the filter in spark's side. So we should disable vectorPushRowFilters option // in case of filters on global dictionary. val hasDictionaryFilterCols = hasFilterOnDictionaryColumn(filterSet, table) + +// In case of more dictionary columns spark code gen needs generate lot of code and that slows +// down the query, so we limit the direct fill in case of more dictionary columns. +val hasMoreDictionaryCols = hasMoreDictionaryColumnsOnProjection(projectSet, table) val vectorPushRowFilters = CarbonProperties.getInstance().isPushRowFiltersForVector if (projects.map(_.toAttribute) == projects && projectSet.size == projects.size && @@ -342,7 +346,7 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { updateRequestedColumns.asInstanceOf[Seq[Attribute]]) // Check whether spark should handle row filters in case of vector flow. if (!vectorPushRowFilters && scan.isInstanceOf[CarbonDataSourceScan] - && !hasDictionaryFilterCols) { + && !hasDictionaryFilterCols && !hasMoreDictionaryCols) { // Here carbon only do page pruning and row level pruning will be done by spark. scan.inputRDDs().head match { case rdd: CarbonScanRDD[InternalRow] => @@ -386,7 +390,8 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { (projectSet ++ filterSet -- handledSet).map(relation.attributeMap).toSeq ++ newProjectList var updateRequestedColumns = -if (!vectorPushRowFilters && !implictsExisted && !hasDictionaryFilterCols) { +if (!vectorPushRowFilters && !implictsExisted && !hasDictionaryFilterCols +&& !hasMoreDictionaryCols) { updateRequestedColumnsFunc( (projectSet ++ filterSet).map(relation.attributeMap).toSeq, table, @@ -398,7 +403,8 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { supportBatchedDataSource(relation.relation.sqlContext, updateRequestedColumns.asInstanceOf[Seq[Attribute]]) && needDecoder.isEmpty - if (!vectorPushRowFilters && !supportBatch && !implictsExisted && !hasDictionaryFilterCols) { + if (!vectorPushRowFilters && !supportBatch && !implictsExisted && !hasDictionaryFilterCols + && !hasMoreDictionaryCols) { // revert for row scan updateRequestedColumns = updateRequestedColumnsFunc(requestedColumns, table, needDecoder) } @@ -414,7 +420,7 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { updateRequestedColumns.asInstanceOf[Seq[Attribute]]) // Check whether spark should handle row filters in case of vector flow. if (!vectorPushRowFilters && scan.isInstanceOf[CarbonDataSourceScan] - && !implictsExisted && !hasDictionaryFilterCols) { + && !implictsExisted && !hasDictionaryFilterCols && !hasMoreDictionaryCols) { // Here carbon only do page pruning and row level pruning will be done by spark. scan.inputRDDs().head match { case rdd: CarbonScanRDD[InternalRow] => @@ -518,6 +524,18 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { filterColumns.exists(c => map.get(c.name).getOrElse(false)) } + private def hasMoreDictionaryColumnsOnProjection(projectColumns: AttributeSet, + relation: CarbonDatasourceHadoopRelation): Boolean = { +val map = relation.carbonRelation.metaData.dictionaryMap +var count = 0 +projectColumns.foreach{c => + if (map.get(c.name).getOrElse(false)) { +count += 1 + } +} +count > CarbonCommonConstants.CARBON_ALLOW_DIRECT_FILL_DICT_COLS_LIMIT + } + private def getPartitioning(carbonTable: CarbonTable, output: Seq[Attribute]): Partitioning = { val info: BucketingInfo = carbonTable.getBucketingInfo(carbonTable.getTableName)
[3/3] carbondata git commit: [CARBONDATA-3112] Optimise decompressing while filling the vector during conversion of primitive typess
[CARBONDATA-3112] Optimise decompressing while filling the vector during conversion of primitive typess Following optimizations done in the PR. 1. Optimise decompressing while filling the vector during conversion of primitive types. It avoids creating an intermediate buffer while decompression. 2. Refactor the global dictionary decoder codegen to minimise the amount of code generated to reduce the time. 3. Disable lazy load for full scan queries as it is unnecessary. 4. Refactor the compressor interface and created Abstract class. All primitive datatype conversions now happen in little_endian as snappy does that conversion while compressing. So it might break the compatibility for ZSTD for the last version. This closes #2863 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bed51ba7 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bed51ba7 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bed51ba7 Branch: refs/heads/master Commit: bed51ba772cf0e8c5c648f620b62d2c9ba4ef9e8 Parents: 51b10ba Author: ravipesala Authored: Fri Oct 26 20:50:53 2018 +0530 Committer: manishgupta88 Committed: Wed Nov 21 12:23:57 2018 +0530 -- .../core/constants/CarbonCommonConstants.java | 6 + ...mpressedDimensionChunkFileBasedReaderV3.java | 2 +- ...CompressedMeasureChunkFileBasedReaderV3.java | 2 +- .../safe/AbstractNonDictionaryVectorFiller.java | 47 +++-- ...feVariableLengthDimensionDataChunkStore.java | 2 +- .../compression/AbstractCompressor.java | 123 .../datastore/compression/SnappyCompressor.java | 4 +- .../datastore/compression/ZstdCompressor.java | 95 + .../page/ColumnPageValueConverter.java | 6 +- .../datastore/page/VarLengthColumnPageBase.java | 2 +- .../page/encoding/ColumnPageDecoder.java| 2 +- .../adaptive/AdaptiveDeltaFloatingCodec.java| 74 --- .../adaptive/AdaptiveDeltaIntegralCodec.java| 164 --- .../adaptive/AdaptiveFloatingCodec.java | 73 +++ .../adaptive/AdaptiveIntegralCodec.java | 137 +++-- .../encoding/compress/DirectCompressCodec.java | 146 -- .../datastore/page/encoding/rle/RLECodec.java | 2 +- .../statistics/PrimitivePageStatsCollector.java | 7 + .../page/statistics/StatisticsCollector.java| 66 -- .../datatype/DecimalConverterFactory.java | 53 +++-- .../scan/result/vector/CarbonColumnVector.java | 4 + .../scan/result/vector/CarbonDictionary.java| 2 + .../vector/impl/CarbonColumnVectorImpl.java | 35 +++- .../vector/impl/CarbonDictionaryImpl.java | 37 .../AbstractCarbonColumnarVector.java | 10 + ...umnarVectorWrapperDirectWithDeleteDelta.java | 10 +- ...erDirectWithDeleteDeltaAndInvertedIndex.java | 34 +++- ...narVectorWrapperDirectWithInvertedIndex.java | 9 +- .../apache/carbondata/core/util/ByteUtil.java | 28 ++- .../presto/CarbonColumnVectorWrapper.java | 9 + .../src/test/resources/IUD/negativevalue.csv| 7 + .../iud/UpdateCarbonTableTestCase.scala | 17 +- .../vectorreader/ColumnarVectorWrapper.java | 10 + .../ColumnarVectorWrapperDirect.java| 8 + .../VectorizedCarbonRecordReader.java | 31 ++- .../datasources/SparkCarbonFileFormat.scala | 10 +- .../org/apache/spark/sql/CarbonVectorProxy.java | 156 ++- .../org/apache/spark/sql/CarbonVectorProxy.java | 200 ++- .../stream/CarbonStreamRecordReader.java| 5 +- .../spark/sql/CarbonDictionaryDecoder.scala | 195 -- .../strategy/CarbonLateDecodeStrategy.scala | 26 ++- 41 files changed, 1193 insertions(+), 663 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/bed51ba7/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index b75648e..094e552 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -1949,6 +1949,12 @@ public final class CarbonCommonConstants { */ public static final String CARBON_WRITTEN_BY_APPNAME = "carbon.writtenby.app.name"; + /** + * When more global dictionary columns are there then there is issue in generating codegen to them + * and it slows down the query.So we limit to 100 for now + */ + public static final int CARBON_ALLOW_DIRECT_FILL_DICT_COLS_L
carbondata git commit: [CARBONDATA-3088][Compaction] support prefetch for compaction
Repository: carbondata Updated Branches: refs/heads/master c5930527a -> 51b10ba70 [CARBONDATA-3088][Compaction] support prefetch for compaction Current compaction performance is low. By adding logs to observe the compaction procedure, we found that in `CarbonFactDataHandlerColumnar.addDataToStore(CarbonRow)`, it will wait about 30ms before submitting a new TablePage producer. Since the method `addDataToStore` is called in single thread, it will result the waiting every 32000 records since it will collect 32000 records to form a TablePage. To reduce the waiting time, we can prepare the 32000 records ahead. This an be achived using prefetch. We will prepare two buffers, one will provide the records to the downstream (`addDataToStore`) and the other one will prepare the records asynchronously. The first is called working buffer and the second is called backup buffer. Once working buffer is exhausted, the two buffers will exchange their roles: the backup buffer will be the new working buffer and the old working buffer will be the new backup buffer and it will be filled asynchronously. Two parameters are involved for this feature: 1. carbon.detail.batch.size: This is an existed parameter and the default value is 100. This parameter controls the batch size of records that return to the client. For normal query, it is OK to keep it as 100. But for compaction, since all the records will be operated, we suggest you to set it to a larger value such as 32000. (32000 is the max rows for a table page that the down stream wants). 2. carbon.compaction.prefetch.enable: This is a new parameter and the default value is `false` (We may change it to `true` later). This parameter controls whether we will prefetch the records for compation. By using this prefetch feature, we can enhance the performance for compaction. More test results can be found in the PR description. This closes #2906 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/51b10ba7 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/51b10ba7 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/51b10ba7 Branch: refs/heads/master Commit: 51b10ba70e53c869d00c4552f8c03134a5f8eb4d Parents: c593052 Author: xuchuanyin Authored: Mon Nov 5 15:11:09 2018 +0800 Committer: manishgupta88 Committed: Wed Nov 21 10:17:35 2018 +0530 -- .../scan/result/iterator/RawResultIterator.java | 199 --- .../carbondata/spark/rdd/StreamHandoffRDD.scala | 2 +- .../merger/CarbonCompactionExecutor.java| 2 +- 3 files changed, 125 insertions(+), 78 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/51b10ba7/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java index 29d8751..1febb0b 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/RawResultIterator.java @@ -16,12 +16,21 @@ */ package org.apache.carbondata.core.scan.result.iterator; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + import org.apache.carbondata.common.CarbonIterator; import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.keygenerator.KeyGenException; import org.apache.carbondata.core.scan.result.RowBatch; import org.apache.carbondata.core.scan.wrappers.ByteArrayWrapper; +import org.apache.carbondata.core.util.CarbonProperties; import org.apache.log4j.Logger; @@ -40,12 +49,14 @@ public class RawResultIterator extends CarbonIterator { */ private CarbonIterator detailRawQueryResultIterator; - /** - * Counter to maintain the row counter. - */ - private int counter = 0; - - private Object[] currentConveretedRawRow = null; + private boolean prefetchEnabled; + private List currentBuffer; + private List backupBuffer; + private int currentIdxInBuffer; + private ExecutorService executorService; + private Future fetchFuture; + private Object[] currentRawRow = null; + private boolean isBackupFilled = false; /** * LOGGER @@ -53,72 +64,124 @@ public class RawResultIterator extends CarbonIterator { private static fi
carbondata git commit: [CARBONDATA-3106] WrittenbyAPI not serialized in executor with globalsort
Repository: carbondata Updated Branches: refs/heads/master d4e8ba441 -> da91d4cc6 [CARBONDATA-3106] WrittenbyAPI not serialized in executor with globalsort Problem: Written_By_APPNAME when added in carbonproperty is not serialized in executor with global sort Solution: Add Written_by_APPNAME in hadoop conf and in executor side get it from configuration and add to carbonproperty This closes #2928 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/da91d4cc Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/da91d4cc Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/da91d4cc Branch: refs/heads/master Commit: da91d4cc6805ce63aade48562a4f367442b38d4a Parents: d4e8ba4 Author: Indhumathi27 Authored: Fri Nov 16 21:49:16 2018 +0530 Committer: manishgupta88 Committed: Tue Nov 20 10:34:28 2018 +0530 -- .../spark/load/DataLoadProcessBuilderOnSpark.scala| 5 ++--- .../spark/load/DataLoadProcessorStepOnSpark.scala | 6 +- .../store/writer/v3/CarbonFactDataWriterImplV3.java | 10 +++--- 3 files changed, 14 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/da91d4cc/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala index 338180d..8ded6bd 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala @@ -66,9 +66,8 @@ object DataLoadProcessBuilderOnSpark { val sortStepRowCounter = sc.accumulator(0, "Sort Processor Accumulator") val writeStepRowCounter = sc.accumulator(0, "Write Processor Accumulator") -CarbonProperties.getInstance() - .addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, -sparkSession.sparkContext.appName) +hadoopConf + .set(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, sparkSession.sparkContext.appName) val conf = SparkSQLUtil.broadCastHadoopConf(sc, hadoopConf) // 1. Input http://git-wip-us.apache.org/repos/asf/carbondata/blob/da91d4cc/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessorStepOnSpark.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessorStepOnSpark.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessorStepOnSpark.scala index 0a68fb0..2ca47b3 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessorStepOnSpark.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessorStepOnSpark.scala @@ -26,9 +26,10 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastore.exception.CarbonDataWriterException import org.apache.carbondata.core.datastore.row.CarbonRow -import org.apache.carbondata.core.util.ThreadLocalSessionInfo +import org.apache.carbondata.core.util.{CarbonProperties, ThreadLocalSessionInfo} import org.apache.carbondata.processing.loading.{BadRecordsLogger, BadRecordsLoggerProvider, CarbonDataLoadConfiguration, DataLoadProcessBuilder, TableProcessingOperations} import org.apache.carbondata.processing.loading.converter.impl.RowConverterImpl import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException @@ -228,6 +229,9 @@ object DataLoadProcessorStepOnSpark { modelBroadcast: Broadcast[CarbonLoadModel], rowCounter: Accumulator[Int], conf: Configuration) { +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME, +conf.get(CarbonCommonConstants.CARBON_WRITTEN_BY_APPNAME)) ThreadLocalSessionInfo.setConfigurationToCurrentThread(conf) var model: CarbonLoadModel = null var tableName: String = null http://git-wip-us.apache.org/repos/asf/carbondata/blob/da91d4cc/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFa
carbondata git commit: [CARBONDATA-3098] Fix for negative exponents value giving wrong results in Float datatype
Repository: carbondata Updated Branches: refs/heads/master cd0ce4187 -> ceb135175 [CARBONDATA-3098] Fix for negative exponents value giving wrong results in Float datatype Problem: When the value of exponent is a negative number then the data is incorrect due to loss of precision of Floating point values and wrong calculation of the count of decimal points. Solution: Handled floating point precision by converting it to double and counted the decimal count values as done in double datatype(using Big Decimal). This closes #2918 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ceb13517 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ceb13517 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ceb13517 Branch: refs/heads/master Commit: ceb13517553e729ccb408d95ae7ae401f7aebcb8 Parents: cd0ce41 Author: Manish Nalla Authored: Wed Nov 14 10:57:49 2018 +0530 Committer: manishgupta88 Committed: Wed Nov 14 14:03:55 2018 +0530 -- .../encoding/adaptive/AdaptiveFloatingCodec.java | 14 +- .../page/statistics/PrimitivePageStatsCollector.java | 14 +- .../datasource/SparkCarbonDataSourceTest.scala | 15 +++ 3 files changed, 17 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/ceb13517/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java index 49696eb..b04c9df 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java @@ -167,19 +167,7 @@ public class AdaptiveFloatingCodec extends AdaptiveCodec { @Override public void encode(int rowId, float value) { - if (targetDataType == DataTypes.BYTE) { -encodedPage.putByte(rowId, (byte) (value * floatFactor)); - } else if (targetDataType == DataTypes.SHORT) { -encodedPage.putShort(rowId, (short) (value * floatFactor)); - } else if (targetDataType == DataTypes.SHORT_INT) { -encodedPage.putShortInt(rowId, (int) (value * floatFactor)); - } else if (targetDataType == DataTypes.INT) { -encodedPage.putInt(rowId, (int) (value * floatFactor)); - } else if (targetDataType == DataTypes.LONG) { -encodedPage.putLong(rowId, (long) (value * floatFactor)); - } else { -throw new RuntimeException("internal error: " + debugInfo()); - } + encode(rowId, (double) value); } @Override http://git-wip-us.apache.org/repos/asf/carbondata/blob/ceb13517/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java index 9be5a58..e604057 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java @@ -253,19 +253,7 @@ public class PrimitivePageStatsCollector implements ColumnPageStatsCollector, Si } private int getDecimalCount(float value) { -int decimalPlaces = 0; -try { - String strValue = Float.valueOf(Math.abs(value)).toString(); - int integerPlaces = strValue.indexOf('.'); - if (-1 != integerPlaces) { -decimalPlaces = strValue.length() - integerPlaces - 1; - } -} catch (NumberFormatException e) { - if (!Double.isInfinite(value)) { -throw e; - } -} -return decimalPlaces; +return getDecimalCount((double) value); } @Override http://git-wip-us.apache.org/repos/asf/carbondata/blob/ceb13517/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala -- diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala index
carbondata git commit: [CARBONDATA-3081] Fixed NPE for boolean type column with null value
Repository: carbondata Updated Branches: refs/heads/master d8dfa4f21 -> 07943cec0 [CARBONDATA-3081] Fixed NPE for boolean type column with null value Problem: NPE thrown when boolean type column has null values. Solution: check for null values before converting byte to boolean. This closes #2901 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/07943cec Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/07943cec Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/07943cec Branch: refs/heads/master Commit: 07943cec0eb17d7b0635083aac5c59e10dbcf03c Parents: d8dfa4f Author: kunal642 Authored: Mon Nov 5 18:46:44 2018 +0530 Committer: manishgupta88 Committed: Tue Nov 13 20:40:16 2018 +0530 -- .../core/metadata/datatype/DecimalType.java | 2 +- .../util/CarbonVectorizedRecordReader.java | 19 +--- .../carbondata/sdk/file/CarbonReaderTest.java | 49 3 files changed, 63 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/07943cec/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalType.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalType.java b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalType.java index b4bc20c..a7f7a4e 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalType.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalType.java @@ -23,7 +23,7 @@ public class DecimalType extends DataType { private int scale; // create a decimal type object with specified precision and scale - public DecimalType(int precision, int scale) { + DecimalType(int precision, int scale) { super(DataTypes.DECIMAL_TYPE_ID, 8, "DECIMAL", -1); this.precision = precision; this.scale = scale; http://git-wip-us.apache.org/repos/asf/carbondata/blob/07943cec/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonVectorizedRecordReader.java -- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonVectorizedRecordReader.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonVectorizedRecordReader.java index 9d3d7d6..7720434 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonVectorizedRecordReader.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonVectorizedRecordReader.java @@ -29,7 +29,6 @@ import org.apache.carbondata.core.datastore.block.TableBlockInfo; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; -import org.apache.carbondata.core.metadata.datatype.DecimalType; import org.apache.carbondata.core.metadata.datatype.StructField; import org.apache.carbondata.core.scan.executor.QueryExecutor; import org.apache.carbondata.core.scan.executor.QueryExecutorFactory; @@ -149,7 +148,8 @@ public class CarbonVectorizedRecordReader extends AbstractRecordReader { new StructField(msr.getColumnName(), msr.getMeasure().getDataType()); } else if (DataTypes.isDecimal(dataType)) { fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), - new DecimalType(msr.getMeasure().getPrecision(), msr.getMeasure().getScale())); + DataTypes.createDecimalType(msr.getMeasure().getPrecision(), + msr.getMeasure().getScale())); } else { fields[msr.getOrdinal()] = new StructField(msr.getColumnName(), DataTypes.DOUBLE); } @@ -171,13 +171,20 @@ public class CarbonVectorizedRecordReader extends AbstractRecordReader { rowCount += 1; Object[] row = new Object[carbonColumnarBatch.columnVectors.length]; for (int i = 0; i < carbonColumnarBatch.columnVectors.length; i ++) { + Object data = carbonColumnarBatch.columnVectors[i].getData(batchIdx - 1); if (carbonColumnarBatch.columnVectors[i].getType() == DataTypes.STRING || carbonColumnarBatch.columnVectors[i].getType() == DataTypes.VARCHAR) { -byte[] data = (byte[]) carbonColumnarBatch.columnVectors[i].getData(batchIdx - 1); -row[i] = ByteUtil.toString(data, 0, data.length); +if (data == null) { + row[i] = null; +} else { + row[i] = ByteUtil.toString((byte[]) data, 0, (((byte[]) data).length)); +} } else if (carbonColumnarBatch.columnVectors[i].getType() == DataTypes.BOOLEAN) { -byte data = (byte) carbonColumnarBatch.columnVe
carbondata git commit: [HOTFIX] Fix NPE in spark, when same vector reads files with local dictionary and without local dictionary
Repository: carbondata Updated Branches: refs/heads/master c94c8ce56 -> d8dfa4f21 [HOTFIX] Fix NPE in spark, when same vector reads files with local dictionary and without local dictionary Problem: NPE in spark, when same vector reads files with local dictionary and without local dictionary Cause: when two carbondata files are present, one with local dictionary and one without local dictionary. If same vector is used to read this files [can happen if task is launched to group of files]. If local dictionary files are found first, dictionary is set for that vector. But it was never reset for another file reading. Solution: reset dictionary once batch is processed,set only for local dictionary batch processing. This closes #2895 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d8dfa4f2 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d8dfa4f2 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d8dfa4f2 Branch: refs/heads/master Commit: d8dfa4f21bf3527ed3522799c2d65e143cfd787c Parents: c94c8ce Author: ajantha-bhat Authored: Mon Nov 5 15:30:27 2018 +0530 Committer: manishgupta88 Committed: Tue Nov 13 20:37:15 2018 +0530 -- .../store/impl/LocalDictDimensionDataChunkStore.java | 10 ++ .../core/scan/result/vector/CarbonDictionary.java | 4 .../scan/result/vector/impl/CarbonDictionaryImpl.java | 10 -- .../carbondata/hadoop/api/CarbonFileInputFormat.java | 2 +- .../spark/vectorreader/VectorizedCarbonRecordReader.java | 1 + 5 files changed, 4 insertions(+), 23 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java index a384743..0eb6d65 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java @@ -61,10 +61,7 @@ public class LocalDictDimensionDataChunkStore implements DimensionDataChunkStore int columnValueSize = dimensionDataChunkStore.getColumnValueSize(); int rowsNum = data.length / columnValueSize; CarbonColumnVector vector = vectorInfo.vector; -if (!dictionary.isDictionaryUsed()) { - vector.setDictionary(dictionary); - dictionary.setDictionaryUsed(); -} +vector.setDictionary(dictionary); BitSet nullBitset = new BitSet(); CarbonColumnVector dictionaryVector = ColumnarVectorWrapperDirectFactory .getDirectVectorWrapperFactory(vector.getDictionaryVector(), invertedIndex, nullBitset, @@ -91,10 +88,7 @@ public class LocalDictDimensionDataChunkStore implements DimensionDataChunkStore } @Override public void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) { -if (!dictionary.isDictionaryUsed()) { - vector.setDictionary(dictionary); - dictionary.setDictionaryUsed(); -} +vector.setDictionary(dictionary); int surrogate = dimensionDataChunkStore.getSurrogate(rowId); if (surrogate == CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY) { vector.putNull(vectorRow); http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java index 2147c43..882a365 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java @@ -22,10 +22,6 @@ public interface CarbonDictionary { int getDictionarySize(); - boolean isDictionaryUsed(); - - void setDictionaryUsed(); - byte[] getDictionaryValue(int index); byte[][] getAllDictionaryValues(); http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8dfa4f2/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java b/core/src/main/java/
carbondata git commit: [CARBONDATA-3057] Implement VectorizedReader for SDK Reader
Repository: carbondata Updated Branches: refs/heads/master 5a0bc6e71 -> 63a28a951 [CARBONDATA-3057] Implement VectorizedReader for SDK Reader 1. Added carbondata file listing for getting splits to avoid block/blocklet datamap loading when filter expressions is not provided by the user 2. Implemented Vectorized reader, exposes a property to switch between record reader/vector reader. This closes #2869 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/63a28a95 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/63a28a95 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/63a28a95 Branch: refs/heads/master Commit: 63a28a951ed552680da1a5047f5937fb90a8d76d Parents: 5a0bc6e Author: kunal642 Authored: Fri Oct 26 11:43:22 2018 +0530 Committer: manishgupta88 Committed: Mon Nov 5 11:49:24 2018 +0530 -- ...feVariableLengthDimensionDataChunkStore.java | 5 +- .../filesystem/AbstractDFSCarbonFile.java | 26 +++ .../core/datastore/filesystem/CarbonFile.java | 8 + .../datastore/filesystem/LocalCarbonFile.java | 23 ++ .../encoding/compress/DirectCompressCodec.java | 7 +- .../core/metadata/datatype/DecimalType.java | 2 +- .../core/metadata/datatype/StructType.java | 2 +- .../vector/impl/CarbonColumnVectorImpl.java | 18 +- docs/sdk-guide.md | 8 + .../carbondata/hadoop/CarbonRecordReader.java | 15 ++ .../hadoop/api/CarbonFileInputFormat.java | 55 - .../util/CarbonVectorizedRecordReader.java | 211 +++ .../sdk/file/CarbonReaderBuilder.java | 36 +++- .../sdk/file/CSVCarbonWriterTest.java | 4 +- .../carbondata/sdk/file/CarbonReaderTest.java | 140 ++-- 15 files changed, 519 insertions(+), 41 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/63a28a95/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java index 2873eed..01db383 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java @@ -163,13 +163,14 @@ public abstract class SafeVariableLengthDimensionDataChunkStore } DataType dt = vector.getType(); -if ((!(dt == DataTypes.STRING) && length == 0) || ByteUtil.UnsafeComparer.INSTANCE +if (((!(dt == DataTypes.STRING) && !(dt == DataTypes.VARCHAR)) && length == 0) +|| ByteUtil.UnsafeComparer.INSTANCE .equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0, CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentDataOffset, length)) { vector.putNull(vectorRow); } else { - if (dt == DataTypes.STRING) { + if (dt == DataTypes.STRING || dt == DataTypes.VARCHAR) { vector.putByteArray(vectorRow, currentDataOffset, length, data); } else if (dt == DataTypes.BOOLEAN) { vector.putBoolean(vectorRow, ByteUtil.toBoolean(data[currentDataOffset])); http://git-wip-us.apache.org/repos/asf/carbondata/blob/63a28a95/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java index 24efb70..d56caac 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java @@ -524,6 +524,27 @@ public abstract class AbstractDFSCarbonFile implements CarbonFile { return getFiles(listStatus); } + /** + * Method used to list files recursively and apply file filter on the result. + * + */ + @Override + public List listFiles(boolean recursive, CarbonFileFilter fileFilter) + throws IOException { +List carbonFiles = new ArrayList<>(); +if (null != fileStatus && fileStatus.isDirectory()) { + RemoteIterator listStatus = fs.listFiles(fileStatus.getPath(), recursive); + while (listStatu
carbondata git commit: [CARBONDATA-3066]add documentation for writtenBy and getVersionDetails APIs in SDK
Repository: carbondata Updated Branches: refs/heads/master fc2a53991 -> e622fa998 [CARBONDATA-3066]add documentation for writtenBy and getVersionDetails APIs in SDK This PR adds the documentation for new APIs added in SDK builder API- writtenBy() reader API- getVersionDetails() This closes #2888 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e622fa99 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e622fa99 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e622fa99 Branch: refs/heads/master Commit: e622fa9982ba72ce7203afbd94d153e74d8e538c Parents: fc2a539 Author: akashrn5 Authored: Wed Oct 31 20:13:02 2018 +0530 Committer: manishgupta88 Committed: Fri Nov 2 16:18:32 2018 +0530 -- docs/sdk-guide.md | 28 +--- 1 file changed, 25 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/e622fa99/docs/sdk-guide.md -- diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md index 3a9be71..0ee1524 100644 --- a/docs/sdk-guide.md +++ b/docs/sdk-guide.md @@ -67,7 +67,7 @@ These SDK writer output contains just carbondata and carbonindex files. No metad CarbonProperties.getInstance().addProperty("enable.offheap.sort", enableOffheap); - CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path).withCsvInput(schema); + CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path).withCsvInput(schema).writtenBy("SDK"); CarbonWriter writer = builder.build(); @@ -124,7 +124,7 @@ public class TestSdkAvro { try { CarbonWriter writer = CarbonWriter.builder() .outputPath(path) - .withAvroInput(new org.apache.avro.Schema.Parser().parse(avroSchema)).build(); + .withAvroInput(new org.apache.avro.Schema.Parser().parse(avroSchema)).writtenBy("SDK").build(); for (int i = 0; i < 100; i++) { writer.write(record); @@ -164,7 +164,7 @@ public class TestSdkJson { Schema CarbonSchema = new Schema(fields); -CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path).withJsonInput(CarbonSchema); +CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path).withJsonInput(CarbonSchema).writtenBy("SDK"); // initialize json writer with carbon schema CarbonWriter writer = builder.build(); @@ -431,6 +431,16 @@ public CarbonWriterBuilder withJsonInput(Schema carbonSchema); ``` /** +* To support writing the ApplicationName which is writing the carbondata file +* This is a mandatory API to call, else the build() call will fail with error. +* @param application name which is writing the carbondata files +* @return CarbonWriterBuilder +*/ +public CarbonWriterBuilder writtenBy(String appName) { +``` + +``` +/** * Build a {@link CarbonWriter} * This writer is not thread safe, * use withThreadSafe() configuration in multi thread environment @@ -686,6 +696,18 @@ Find example code at [CarbonReaderExample](https://github.com/apache/carbondata/ public static Schema readSchemaInIndexFile(String indexFilePath); ``` +``` + /** + * This method return the version details in formatted string by reading from carbondata file + * If application name is SDK_1.0.0 and this has written the carbondata file in carbondata 1.6 project version, + * then this API returns the String "SDK_1.0.0 in version: 1.6.0-SNAPSHOT" + * @param dataFilePath complete path including carbondata file name + * @return string with information of who has written this file in which carbondata project version + * @throws IOException + */ + public static String getVersionDetails(String dataFilePath); +``` + ### Class org.apache.carbondata.sdk.file.Schema ``` /**
carbondata git commit: [CARBONDATA-3062] Fix Compatibility issue with cache_level as blocklet
Repository: carbondata Updated Branches: refs/heads/master 269f4c378 -> 6e58418eb [CARBONDATA-3062] Fix Compatibility issue with cache_level as blocklet In case of hybrid store we can have block as well as blocklet schema. Scenario: When there is a hybrid store in which few loads are from legacy store which do not contain the blocklet information and hence they will be, by default have cache_level as BLOCK and few loads with latest store which contain the BLOCKLET information and have cache_level BLOCKLET. For these type of scenarios we need to have separate task and footer schemas. For all loads with/without blocklet info there will not be any additional cost of maintaining 2 variables This closes #2883 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6e58418e Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6e58418e Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6e58418e Branch: refs/heads/master Commit: 6e58418eb15effbf60290d2e1b8ff06f8613d714 Parents: 269f4c3 Author: Indhumathi27 Authored: Tue Oct 30 21:38:56 2018 +0530 Committer: manishgupta88 Committed: Fri Nov 2 10:54:49 2018 +0530 -- .../block/SegmentPropertiesAndSchemaHolder.java | 82 +--- .../indexstore/blockletindex/BlockDataMap.java | 2 +- .../blockletindex/BlockletDataMap.java | 2 +- ...ithColumnMetCacheAndCacheLevelProperty.scala | 2 +- .../merger/RowResultMergerProcessor.java| 6 +- 5 files changed, 57 insertions(+), 37 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6e58418e/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java index cc6341b..1b7e1f8 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java @@ -284,11 +284,17 @@ public class SegmentPropertiesAndSchemaHolder { private int[] columnCardinality; private SegmentProperties segmentProperties; private List minMaxCacheColumns; -private CarbonRowSchema[] taskSummarySchema; -// same variable can be used for block and blocklet schema because at any given cache_level -// with either block or blocklet and whenever cache_level is changed the cache and its -// corresponding segmentProperties is flushed -private CarbonRowSchema[] fileFooterEntrySchema; +// in case of hybrid store we can have block as well as blocklet schema +// Scenario: When there is a hybrid store in which few loads are from legacy store which do +// not contain the blocklet information and hence they will be, by default have cache_level as +// BLOCK and few loads with latest store which contain the BLOCKLET information and have +// cache_level BLOCKLET. For these type of scenarios we need to have separate task and footer +// schemas. For all loads with/without blocklet info there will not be any additional cost +// of maintaining 2 variables +private CarbonRowSchema[] taskSummarySchemaForBlock; +private CarbonRowSchema[] taskSummarySchemaForBlocklet; +private CarbonRowSchema[] fileFooterEntrySchemaForBlock; +private CarbonRowSchema[] fileFooterEntrySchemaForBlocklet; public SegmentPropertiesWrapper(CarbonTable carbonTable, List columnsInTable, int[] columnCardinality) { @@ -314,8 +320,10 @@ public class SegmentPropertiesAndSchemaHolder { if (null != minMaxCacheColumns) { minMaxCacheColumns.clear(); } - taskSummarySchema = null; - fileFooterEntrySchema = null; + taskSummarySchemaForBlock = null; + taskSummarySchemaForBlocklet = null; + fileFooterEntrySchemaForBlock = null; + fileFooterEntrySchemaForBlocklet = null; } @Override public boolean equals(Object obj) { @@ -350,48 +358,62 @@ public class SegmentPropertiesAndSchemaHolder { return columnCardinality; } -public CarbonRowSchema[] getTaskSummarySchema(boolean storeBlockletCount, +public CarbonRowSchema[] getTaskSummarySchemaForBlock(boolean storeBlockletCount, boolean filePathToBeStored) throws MemoryException { - if (null == taskSummarySchema) { + if (null == taskSummarySchemaForBlock) { synchronized (taskSchemaLock) { - if (null == taskSummarySchema) { -taskSummarySchema = SchemaGenerator + if (n
carbondata git commit: [CARBONDATA-3054] Fix Dictionary file cannot be read in S3a with CarbonDictionaryDecoder.doConsume() codeGen
Repository: carbondata Updated Branches: refs/heads/master 0e39abf81 -> bcf3e0fd5 [CARBONDATA-3054] Fix Dictionary file cannot be read in S3a with CarbonDictionaryDecoder.doConsume() codeGen problem: In S3a environment, when queried the data which has dictionary files, Dictionary file cannot be read in S3a with CarbonDictionaryDecoder.doConsume() codeGen even though file is present. cause: CarbonDictionaryDecoder.doConsume() codeGen doesn't set hadoop conf in thread local variable, only doExecute() sets it. Hence, when getDictionaryWrapper() called from doConsume() codeGen, AbstractDictionaryCache.getDictionaryMetaCarbonFile() returns false for fileExists() operation. solution: In CarbonDictionaryDecoder.doConsume() codeGen, set hadoop conf in thread local variable This closes #2876 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bcf3e0fd Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bcf3e0fd Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bcf3e0fd Branch: refs/heads/master Commit: bcf3e0fd595f612ee33a8ee2d9aa6197998f626e Parents: 0e39abf Author: ajantha-bhat Authored: Mon Oct 29 17:56:29 2018 +0530 Committer: manishgupta88 Committed: Wed Oct 31 15:47:51 2018 +0530 -- .../spark/sql/CarbonDictionaryDecoder.scala | 25 +--- 1 file changed, 17 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/bcf3e0fd/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala index f3d5bf0..c9434a1 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala @@ -21,6 +21,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{Partition, TaskContext} +import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.errors.attachTree @@ -31,6 +32,7 @@ import org.apache.spark.sql.execution.{CodegenSupport, SparkPlan, UnaryExecNode} import org.apache.spark.sql.optimizer.CarbonDecoderRelation import org.apache.spark.sql.types._ import org.apache.spark.sql.util.{SparkSQLUtil, SparkTypeConverter} +import org.apache.spark.util.SerializableConfiguration import org.apache.carbondata.core.cache.{Cache, CacheProvider, CacheType} import org.apache.carbondata.core.cache.dictionary.{Dictionary, DictionaryColumnUniqueIdentifier} @@ -42,7 +44,6 @@ import org.apache.carbondata.core.metadata.schema.table.CarbonTable import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension import org.apache.carbondata.core.scan.executor.util.QueryUtil import org.apache.carbondata.core.util.{DataTypeUtil, ThreadLocalSessionInfo} -import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil import org.apache.carbondata.spark.CarbonAliasDecoderRelation import org.apache.carbondata.spark.rdd.CarbonRDDWithTableInfo @@ -69,6 +70,9 @@ case class CarbonDictionaryDecoder( val getDictionaryColumnIds: Array[(String, ColumnIdentifier, CarbonDimension)] = CarbonDictionaryDecoder.getDictionaryColumnMapping(child.output, relations, profile, aliasMap) + val broadcastConf = SparkSQLUtil.broadCastHadoopConf( +sparkSession.sparkContext, sparkSession.sessionState.newHadoopConf()) + override def doExecute(): RDD[InternalRow] = { attachTree(this, "execute") { val tableNameToCarbonTableMapping = relations.map { relation => @@ -76,12 +80,10 @@ case class CarbonDictionaryDecoder( (carbonTable.getTableName, carbonTable) }.toMap - val conf = SparkSQLUtil -.broadCastHadoopConf(sparkSession.sparkContext, sparkSession.sessionState.newHadoopConf()) if (CarbonDictionaryDecoder.isRequiredToDecode(getDictionaryColumnIds)) { val dataTypes = child.output.map { attr => attr.dataType } child.execute().mapPartitions { iter => - ThreadLocalSessionInfo.setConfigurationToCurrentThread(conf.value.value) + ThreadLocalSessionInfo.setConfigurationToCurrentThread(broadcastConf.value.value) val cacheProvider: CacheProvider = CacheProvider.getInstance val forwardDictionaryCache: Cache[DictionaryColumnUniqueIdentifier, Dictionary] = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY) @@ -137,7 +139,7 @@ case cl
carbondata git commit: [CARBONDATA-3042] Column Schema objects are present in Driver and Executor even after dropping table
Repository: carbondata Updated Branches: refs/heads/master e2c517e3f -> 10b393808 [CARBONDATA-3042] Column Schema objects are present in Driver and Executor even after dropping table Problem: Column Schema objects are present in Driver and Executor even after dropping table. Solution: In Driver: After dropping table, remove entry of tableInfo from CarbonMetaDataInstance. In Executor: Remove usage of CarbonMetaDataInstance object and instead pass CarbonTable Object itself This closes #2852 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/10b39380 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/10b39380 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/10b39380 Branch: refs/heads/master Commit: 10b393808e91344b017ba3e946b28217c2dd9757 Parents: e2c517e Author: Indhumathi27 Authored: Thu Oct 25 13:37:08 2018 +0530 Committer: manishgupta88 Committed: Tue Oct 30 14:19:39 2018 +0530 -- .../core/metadata/CarbonMetadata.java | 5 +- .../statusmanager/SegmentStatusManager.java | 6 +- .../carbondata/core/util/DeleteLoadFolders.java | 31 +-- .../spark/rdd/AlterTableLoadPartitionRDD.scala | 4 +- .../carbondata/spark/rdd/CarbonMergerRDD.scala | 3 +- .../carbondata/spark/rdd/StreamHandoffRDD.scala | 1 - .../spark/sql/CarbonDictionaryDecoder.scala | 5 -- .../spark/sql/hive/CarbonFileMetastore.scala| 3 +- .../spark/sql/hive/CarbonHiveMetaStore.scala| 2 +- .../loading/DataLoadProcessBuilder.java | 2 - .../sort/impl/ParallelReadMergeSorterImpl.java | 9 ++- ...allelReadMergeSorterWithColumnRangeImpl.java | 8 +-- .../UnsafeBatchParallelReadMergeSorterImpl.java | 6 +- ...allelReadMergeSorterWithColumnRangeImpl.java | 11 ++-- .../CarbonRowDataWriterProcessorStepImpl.java | 13 ++--- .../steps/DataConverterProcessorStepImpl.java | 6 +- .../steps/DataWriterBatchProcessorStepImpl.java | 11 ++-- .../steps/DataWriterProcessorStepImpl.java | 18 +++--- .../merger/CompactionResultSortProcessor.java | 4 +- .../merger/RowResultMergerProcessor.java| 5 +- .../partition/spliter/RowResultProcessor.java | 5 +- .../sort/sortdata/SortParameters.java | 44 +-- .../store/CarbonFactDataHandlerModel.java | 4 +- .../util/CarbonDataProcessorUtil.java | 58 +--- 24 files changed, 110 insertions(+), 154 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/10b39380/core/src/main/java/org/apache/carbondata/core/metadata/CarbonMetadata.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/CarbonMetadata.java b/core/src/main/java/org/apache/carbondata/core/metadata/CarbonMetadata.java index 850f477..e44092e 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/CarbonMetadata.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/CarbonMetadata.java @@ -69,9 +69,8 @@ public final class CarbonMetadata { /** * Below method will be used to set the carbon table - * This method will be used in executor side as driver will always have - * updated table so from driver during query execution and data loading - * we just need to add the table + * Note: Use this method only in driver as clean up in Executor is not handled + * if this table is added to executor * * @param carbonTable */ http://git-wip-us.apache.org/repos/asf/carbondata/blob/10b39380/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java index 9196367..fbb765b 100755 --- a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java +++ b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java @@ -1001,9 +1001,9 @@ public class SegmentStatusManager { CarbonLockUtil.fileUnlock(carbonTableStatusLock, LockUsage.TABLE_STATUS_LOCK); } if (updationCompletionStatus) { -DeleteLoadFolders.physicalFactAndMeasureMetadataDeletion( -identifier, carbonTable.getMetadataPath(), -newAddedLoadHistoryList, isForceDeletion, partitionSpecs); +DeleteLoadFolders +.physicalFactAndMeasureMetadataDeletion(carbonTable, newAddedLoadHistoryList, +isForceDeletion, partitionSpecs); } } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/10b39380/c
carbondata git commit: [CARBONDATA-2977] Write uncompress_size to ChunkCompressMeta in the file
Repository: carbondata Updated Branches: refs/heads/master 33a6dc2ac -> e19c5da6d [CARBONDATA-2977] Write uncompress_size to ChunkCompressMeta in the file Currently total_uncompressed_size and total_compress_size in the ChunkCompressMeta in the carbondata file is always 0. This PR writes the correct value to the file. This closes #2772 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e19c5da6 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e19c5da6 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e19c5da6 Branch: refs/heads/master Commit: e19c5da6dbb07056b1053319d48a64a4b0715129 Parents: 33a6dc2 Author: Jacky Li Authored: Thu Sep 27 00:39:29 2018 +0800 Committer: manishgupta88 Committed: Thu Oct 25 14:38:28 2018 +0530 -- .../core/datastore/page/ColumnPage.java | 39 +++ .../datastore/page/LocalDictColumnPage.java | 9 +++ .../page/UnsafeFixLengthColumnPage.java | 7 ++ .../datastore/page/VarLengthColumnPageBase.java | 5 ++ .../page/encoding/ColumnPageEncoder.java| 7 +- .../core/util/CarbonMetadataUtil.java | 10 +-- .../apache/carbondata/tool/CarbonCliTest.java | 69 7 files changed, 81 insertions(+), 65 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java index 8b9a9a5..e8097da 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java @@ -724,6 +724,45 @@ public abstract class ColumnPage { } /** + * Return total page data length in bytes + */ + public long getPageLengthInBytes() throws IOException { +DataType dataType = columnPageEncoderMeta.getStoreDataType(); +if (dataType == DataTypes.BOOLEAN) { + return getBooleanPage().length; +} else if (dataType == DataTypes.BYTE) { + return getBytePage().length; +} else if (dataType == DataTypes.SHORT) { + return getShortPage().length * SHORT.getSizeInBytes(); +} else if (dataType == DataTypes.SHORT_INT) { + return getShortIntPage().length; +} else if (dataType == DataTypes.INT) { + return getIntPage().length * INT.getSizeInBytes(); +} else if (dataType == DataTypes.LONG) { + return getLongPage().length * LONG.getSizeInBytes(); +} else if (dataType == DataTypes.FLOAT) { + return getFloatPage().length * FLOAT.getSizeInBytes(); +} else if (dataType == DataTypes.DOUBLE) { + return getDoublePage().length * DOUBLE.getSizeInBytes(); +} else if (DataTypes.isDecimal(dataType)) { + return getDecimalPage().length; +} else if (dataType == DataTypes.BYTE_ARRAY +&& columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { + return getComplexChildrenLVFlattenedBytePage().length; +} else if (dataType == DataTypes.BYTE_ARRAY +&& (columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_STRUCT +|| columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_ARRAY +|| columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.PLAIN_LONG_VALUE +|| columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.PLAIN_VALUE)) { + return getComplexParentFlattenedBytePage().length; +} else if (dataType == DataTypes.BYTE_ARRAY) { + return getLVFlattenedBytePage().length; +} else { + throw new UnsupportedOperationException("unsupport compress column page: " + dataType); +} + } + + /** * Compress page data using specified compressor */ public byte[] compress(Compressor compressor) throws MemoryException, IOException { http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java index 3da154a..5cf2130 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java @@ -344,4 +344,13 @@ public class LocalDictColumnPage extends ColumnPa
carbondata git commit: [CARBONDATA-2998] Refresh column schema for old store(before V3) for SORT_COLUMNS option
Repository: carbondata Updated Branches: refs/heads/master c429cee16 -> 278d17178 [CARBONDATA-2998] Refresh column schema for old store(before V3) for SORT_COLUMNS option Problem: For old store, store before V3, SORT_COLUMN option is not set in ColumnSchema, but considered as SORT_COLUMNS. So, while refreshing the table it will try to read from the thrift and make it as no sort column in ColumnSchema as it is not set before. Solution: While refreshing the table, check for the SORT_COLUMN property in the table properties and if nothing is set, then by default take all the dimension columns as SORT_COLUMNS. This closes #2806 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/278d1717 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/278d1717 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/278d1717 Branch: refs/heads/master Commit: 278d1717880541a052aa4a5ee96ba73423d650b1 Parents: c429cee Author: dhatchayani Authored: Tue Oct 9 17:34:42 2018 +0530 Committer: manishgupta88 Committed: Wed Oct 24 14:52:25 2018 +0530 -- .../management/RefreshCarbonTableCommand.scala | 31 .../merger/CompactionResultSortProcessor.java | 6 ++-- 2 files changed, 34 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/278d1717/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala index 39e85ba..c129194 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/RefreshCarbonTableCommand.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.command.management import java.util import scala.collection.JavaConverters._ +import scala.collection.mutable import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier @@ -75,6 +76,9 @@ case class RefreshCarbonTableCommand( if (FileFactory.isFileExist(schemaFilePath, FileFactory.getFileType(schemaFilePath))) { // read TableInfo val tableInfo = SchemaReader.getTableInfo(identifier) +// refresh the column schema in case of store before V3 +refreshColumnSchema(tableInfo) + // 2.2 register the table with the hive check if the table being registered has // aggregate table then do the below steps // 2.2.1 validate that all the aggregate tables are copied at the store location. @@ -119,6 +123,33 @@ case class RefreshCarbonTableCommand( } /** + * Refresh the sort_column flag in column schema in case of old store. Before V3, sort_column + * option is not set but by default all dimension columns should be treated + * as sort columns if SORT_COLUMNS property is not defined in tblproperties + * + * @param tableInfo + */ + def refreshColumnSchema(tableInfo: TableInfo): Unit = { +val tableProps: mutable.Map[String, String] = tableInfo.getFactTable.getTableProperties.asScala +val sortColumns = tableProps.get(CarbonCommonConstants.SORT_COLUMNS) +sortColumns match { + case Some(sortColumn) => + // don't do anything + case None => +// iterate over all the columns and make all the dimensions as sort columns true +// check for the complex data types parent and child columns to +// avoid adding them in SORT_COLUMNS +tableInfo.getFactTable.getListOfColumns.asScala collect +({ + case columnSchema if columnSchema.isDimensionColumn && + !columnSchema.getDataType.isComplexType && + columnSchema.getSchemaOrdinal != -1 => +columnSchema.setSortColumn(true) +}) +} + } + + /** * the method prepare the data type for raw column * * @param column http://git-wip-us.apache.org/repos/asf/carbondata/blob/278d1717/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CompactionResultSortProcessor.java index e0a30da..8d28d45 100644 --- a/processing/sr
carbondata git commit: [CARBONDATA-3022] Refactor ColumnPageWrapper
Repository: carbondata Updated Branches: refs/heads/master c7c83684b -> fa9a4eeeb [CARBONDATA-3022] Refactor ColumnPageWrapper Refactor ColumnPageWrapper for better filter query performance. Removed unnecessary checks and loops This closes #2808 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fa9a4eee Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fa9a4eee Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fa9a4eee Branch: refs/heads/master Commit: fa9a4eeeb489b77b3040d54e7878bac93ccb12aa Parents: c7c8368 Author: dhatchayani Authored: Wed Oct 10 13:18:01 2018 +0530 Committer: manishgupta88 Committed: Tue Oct 23 15:12:21 2018 +0530 -- .../chunk/store/ColumnPageWrapper.java | 126 +-- .../core/scan/executor/util/QueryUtil.java | 32 - .../carbondata/core/util/DataTypeUtil.java | 18 --- 3 files changed, 32 insertions(+), 144 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa9a4eee/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java index 627c75f..ba853f9 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java @@ -26,7 +26,6 @@ import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; -import org.apache.carbondata.core.scan.executor.util.QueryUtil; import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.scan.result.vector.CarbonDictionary; import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; @@ -83,31 +82,6 @@ public class ColumnPageWrapper implements DimensionColumnPage { return chunkIndex + 1; } - /** - * Fill the data to the vector - * - * @param rowId - * @param vector - * @param vectorRow - */ - private void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) { -if (columnPage.getNullBits().get(rowId) -&& columnPage.getColumnSpec().getColumnType() == ColumnType.COMPLEX_PRIMITIVE) { - // if this row is null, return default null represent in byte array - byte[] value = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; - QueryUtil.putDataToVector(vector, value, vectorRow, value.length); -} else if (columnPage.getNullBits().get(rowId)) { - // if this row is null, return default null represent in byte array - byte[] value = CarbonCommonConstants.EMPTY_BYTE_ARRAY; - QueryUtil.putDataToVector(vector, value, vectorRow, value.length); -} else { - if (isExplicitSorted) { -rowId = invertedReverseIndex[rowId]; - } - QueryUtil.putDataToVector(vector, getActualData(rowId, true), vectorRow); -} - } - @Override public int fillVector(int[] filteredRowId, ColumnVectorInfo[] vectorInfo, int chunkIndex) { ColumnVectorInfo columnVectorInfo = vectorInfo[chunkIndex]; @@ -214,78 +188,42 @@ public class ColumnPageWrapper implements DimensionColumnPage { return null; } - private Object getActualData(int rowId, boolean isRowIdChanged) { -ColumnType columnType = columnPage.getColumnSpec().getColumnType(); -DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); -DataType targetDataType = columnPage.getDataType(); -if (null != localDictionary) { - return localDictionary - .getDictionaryValue(CarbonUtil.getSurrogateInternal(columnPage.getBytes(rowId), 0, 3)); -} else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && this.isAdaptiveEncoded()) || ( -columnType == ColumnType.PLAIN_VALUE && DataTypeUtil.isPrimitiveColumn(srcDataType))) { - if (!isRowIdChanged && columnPage.getNullBits().get(rowId) - && columnType == ColumnType.COMPLEX_PRIMITIVE) { -// if this row is null, return default null represent in byte array -return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; - } - if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) { -// if this row is null, return default null represent in byte array -return CarbonCommonConstants.EMPTY_BYTE_ARRAY; + /** + * Fill the data to the vector + *
carbondata git commit: [CARBONDATA-2990] Fixed JVM crash when rebuilding bloom datamap
Repository: carbondata Updated Branches: refs/heads/master c3a870449 -> 8fbd4a5f5 [CARBONDATA-2990] Fixed JVM crash when rebuilding bloom datamap Problem: while rebuilding the datamap it access the datamap store so it builds datamap and store in unsafe onheap storage. But while closing the reader it frees all memory acquired during that task. Since acquired memory is onheap but releasing the memory with offheap allocator it crashes the jvm. Solution: Maintain the type of memory acquired in the memory block itself and get the allocator as per the memory type and release it. This closes #2793 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8fbd4a5f Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8fbd4a5f Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8fbd4a5f Branch: refs/heads/master Commit: 8fbd4a5f53070b3755f1f573b09e0066fa93a6ea Parents: c3a8704 Author: ravipesala Authored: Sun Sep 30 11:27:57 2018 +0530 Committer: manishgupta88 Committed: Thu Oct 4 14:39:25 2018 +0530 -- .../core/indexstore/UnsafeMemoryDMStore.java| 14 +++ .../core/memory/HeapMemoryAllocator.java| 5 ++- .../carbondata/core/memory/MemoryBlock.java | 14 ++- .../carbondata/core/memory/MemoryType.java | 23 ++ .../core/memory/UnsafeMemoryAllocator.java | 2 +- .../core/memory/UnsafeMemoryManager.java| 44 +++- 6 files changed, 70 insertions(+), 32 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/8fbd4a5f/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java index 196559a..0db1b0a 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java @@ -19,9 +19,9 @@ package org.apache.carbondata.core.indexstore; import org.apache.carbondata.core.indexstore.row.DataMapRow; import org.apache.carbondata.core.indexstore.row.UnsafeDataMapRow; import org.apache.carbondata.core.indexstore.schema.CarbonRowSchema; -import org.apache.carbondata.core.memory.MemoryAllocator; import org.apache.carbondata.core.memory.MemoryBlock; import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.memory.MemoryType; import org.apache.carbondata.core.memory.UnsafeMemoryManager; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; @@ -51,7 +51,7 @@ public class UnsafeMemoryDMStore extends AbstractMemoryDMStore { public UnsafeMemoryDMStore() throws MemoryException { this.allocatedSize = capacity; this.memoryBlock = -UnsafeMemoryManager.allocateMemoryWithRetry(MemoryAllocator.HEAP, taskId, allocatedSize); +UnsafeMemoryManager.allocateMemoryWithRetry(MemoryType.ONHEAP, taskId, allocatedSize); this.pointers = new int[1000]; } @@ -74,10 +74,10 @@ public class UnsafeMemoryDMStore extends AbstractMemoryDMStore { private void increaseMemory(int requiredMemory) throws MemoryException { MemoryBlock newMemoryBlock = UnsafeMemoryManager -.allocateMemoryWithRetry(MemoryAllocator.HEAP, taskId, allocatedSize + requiredMemory); +.allocateMemoryWithRetry(MemoryType.ONHEAP, taskId, allocatedSize + requiredMemory); getUnsafe().copyMemory(this.memoryBlock.getBaseObject(), this.memoryBlock.getBaseOffset(), newMemoryBlock.getBaseObject(), newMemoryBlock.getBaseOffset(), runningLength); -UnsafeMemoryManager.INSTANCE.freeMemory(MemoryAllocator.HEAP, taskId, this.memoryBlock); +UnsafeMemoryManager.INSTANCE.freeMemory(taskId, this.memoryBlock); allocatedSize = allocatedSize + requiredMemory; this.memoryBlock = newMemoryBlock; } @@ -190,10 +190,10 @@ public class UnsafeMemoryDMStore extends AbstractMemoryDMStore { public void finishWriting() throws MemoryException { if (runningLength < allocatedSize) { MemoryBlock allocate = - UnsafeMemoryManager.allocateMemoryWithRetry(MemoryAllocator.HEAP, taskId, runningLength); + UnsafeMemoryManager.allocateMemoryWithRetry(MemoryType.ONHEAP, taskId, runningLength); getUnsafe().copyMemory(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset(), allocate.getBaseObject(), allocate.getBaseOffset(), runningLength); - UnsafeMemoryManager.INSTANCE.freeMemory(MemoryAllocator.HEAP, taskId, memor
carbondata git commit: [CARBONDATA-2982] CarbonSchemaReader support array
Repository: carbondata Updated Branches: refs/heads/master 7d1fcb309 -> d8a51c9bf [CARBONDATA-2982] CarbonSchemaReader support array This PR fix the issue and change : org.apache.carbondata.sdk.file.CarbonSchemaReader#readSchemaInDataFile org.apache.carbondata.sdk.file.CarbonSchemaReader#readSchemaInIndexFile This PR remove child schema This closes #2780 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d8a51c9b Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d8a51c9b Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d8a51c9b Branch: refs/heads/master Commit: d8a51c9bf314fb1cd5f6112e66eb04e776a0553d Parents: 7d1fcb3 Author: xubo245 Authored: Fri Sep 28 11:47:22 2018 +0800 Committer: manishgupta88 Committed: Wed Oct 3 16:24:38 2018 +0530 -- .../examples/sdk/CarbonReaderExample.java | 4 +- .../carbondata/examplesCI/RunExamples.scala | 5 ++ .../carbondata/sdk/file/CarbonSchemaReader.java | 14 +++- .../carbondata/sdk/file/CarbonReaderTest.java | 86 4 files changed, 105 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8a51c9b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java -- diff --git a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java index 9e80567..ef4ae7a 100644 --- a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java +++ b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java @@ -122,10 +122,11 @@ public class CarbonReaderExample { for (int j = 0; j < arr.length; j++) { System.out.print(arr[j] + " "); } +assert (arr[0].equals("Hello")); +assert (arr[3].equals("Carbon")); System.out.println(); i++; } -System.out.println("\nFinished"); reader.close(); // Read data @@ -148,7 +149,6 @@ public class CarbonReaderExample { row[5], row[6], row[7], row[8], row[9], row[10])); i++; } -System.out.println("\nFinished"); reader2.close(); FileUtils.deleteDirectory(new File(path)); } catch (Throwable e) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8a51c9b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala -- diff --git a/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala index 2b9b999..6a13dc3 100644 --- a/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala +++ b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala @@ -23,6 +23,7 @@ import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.examples._ import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.util.CarbonProperties +import org.apache.carbondata.examples.sdk.CarbonReaderExample /** * Test suite for examples @@ -113,4 +114,8 @@ class RunExamples extends QueryTest with BeforeAndAfterAll { test("ExternalTableExample") { ExternalTableExample.exampleBody(spark) } + + test("CarbonReaderExample") { +CarbonReaderExample.main(null) + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8a51c9b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java -- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java index d8882bc..e84a25a 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java @@ -65,7 +65,15 @@ public class CarbonSchemaReader { */ public static Schema readSchemaInDataFile(String dataFilePath) throws IOException { CarbonHeaderReader reader = new CarbonHeaderReader(dataFilePath); -return new Schema(reader.readSchema()); +List columnSchemaList = new ArrayList(); +List schemaList = reader.readSchema(); +for (int i = 0; i &
carbondata git commit: [CARBONDATA-2980][BloomDataMap] Fix bug in clearing bloomindex cache when recreating table and datamap
Repository: carbondata Updated Branches: refs/heads/master 9ae91cc5a -> 1c1ced32d [CARBONDATA-2980][BloomDataMap] Fix bug in clearing bloomindex cache when recreating table and datamap We use shard path as a part of key for bloomindex cache. However the path separator in windows is different from that in linux, which causes the cache not being cleaned when we clear the cache. (When we loading the cache, the path separator is '/' while dropping the cache, the path separator is '\') Here we fixed the bug by uniforming the path separator while clearing the cache. This closes #2778 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1c1ced32 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1c1ced32 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1c1ced32 Branch: refs/heads/master Commit: 1c1ced32d122ba8ce7cbad4fd29f778f5dbb4871 Parents: 9ae91cc Author: xuchuanyin Authored: Sat Sep 29 14:03:09 2018 +0800 Committer: manishgupta88 Committed: Sun Sep 30 12:19:56 2018 +0530 -- .../datamap/bloom/BloomCoarseGrainDataMapFactory.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1c1ced32/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java -- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java index 8c74c94..8974918 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java @@ -235,13 +235,13 @@ public class BloomCoarseGrainDataMapFactory extends DataMapFactory
carbondata git commit: [CARBONDATA-2972] Debug Logs and function added for Adaptive Encoding
Repository: carbondata Updated Branches: refs/heads/master 1b4109d5b -> 54bcf4963 [CARBONDATA-2972] Debug Logs and function added for Adaptive Encoding Added a function to get the type of encoding used. Added the debug log for checking which type of encoding is used This closes #2758 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/54bcf496 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/54bcf496 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/54bcf496 Branch: refs/heads/master Commit: 54bcf49638262af82583d930632018da6c73c8c5 Parents: 1b4109d Author: Manish Nalla Authored: Tue Sep 25 17:44:49 2018 +0530 Committer: manishgupta88 Committed: Thu Sep 27 17:16:37 2018 +0530 -- .../core/datastore/page/encoding/ColumnPageEncoder.java | 8 .../org/apache/carbondata/processing/store/TablePage.java| 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/54bcf496/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java index 3067823..44e7192 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java @@ -78,6 +78,14 @@ public abstract class ColumnPageEncoder { } } + public Encoding getEncodingType() { +List currEncodingList = getEncodingList(); +if (CarbonUtil.isEncodedWithMeta(currEncodingList)) { + return currEncodingList.get(0); +} +return null; + } + /** * Return a encoded column page by encoding the input page * The encoded binary data and metadata are wrapped in encoding column page http://git-wip-us.apache.org/repos/asf/carbondata/blob/54bcf496/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java index 791b4c6..82129db 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java @@ -424,7 +424,8 @@ public class TablePage { "Encoder result ---> Source data type: " + noDictDimensionPages[noDictIndex] .getDataType().getName() + " Destination data type: " + targetDataType .getName() + " for the column: " + noDictDimensionPages[noDictIndex] - .getColumnSpec().getFieldName()); + .getColumnSpec().getFieldName() + " having encoding type: " + + columnPageEncoder.getEncodingType()); } } noDictIndex++;
carbondata git commit: [CARBONDATA-2973] Added documentation for fallback condition for complex columns in local Dictionary
Repository: carbondata Updated Branches: refs/heads/master a9ddfbd7b -> 3f99e9b7f [CARBONDATA-2973] Added documentation for fallback condition for complex columns in local Dictionary 1. Added documentation for fallback condition for complex columns in local Dictionary 2. Added documentation for system level property" carbon.local.dictionary.decoder.fallback" This closes #2766 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3f99e9b7 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3f99e9b7 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3f99e9b7 Branch: refs/heads/master Commit: 3f99e9b7f87b387f03cb5bece2b2a8c5a50b Parents: a9ddfbd Author: praveenmeenakshi56 Authored: Wed Sep 26 12:40:37 2018 +0530 Committer: manishgupta88 Committed: Wed Sep 26 18:14:44 2018 +0530 -- docs/configuration-parameters.md | 2 +- docs/ddl-of-carbondata.md| 16 +++- docs/faq.md | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3f99e9b7/docs/configuration-parameters.md -- diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md index 7edae47..662525b 100644 --- a/docs/configuration-parameters.md +++ b/docs/configuration-parameters.md @@ -119,7 +119,7 @@ This section provides the details of all the configurations required for the Car | Parameter | Default Value | Description | |--|---|---| -| carbon.max.driver.lru.cache.size | -1 | Maximum memory **(in MB)** upto which the driver process can cache the data (BTree and dictionary values). Beyond this, least recently used data will be removed from cache before loading new set of values.Default value of -1 means there is no memory limit for caching. Only integer values greater than 0 are accepted.**NOTE:** Minimum number of entries that needs to be removed from cache in order to load the new set of data is determined and unloaded.ie.,for example if 3 cache entries qualify for pre-emption, out of these, those entries that free up more cache memory is removed prior to others. Please refer [FAQs](./faq.md#how-to-check-LRU-cache-memory-footprint) for checking LRU cache memory footprint. | +| carbon.max.driver.lru.cache.size | -1 | Maximum memory **(in MB)** upto which the driver process can cache the data (BTree and dictionary values). Beyond this, least recently used data will be removed from cache before loading new set of values.Default value of -1 means there is no memory limit for caching. Only integer values greater than 0 are accepted.**NOTE:** Minimum number of entries that needs to be removed from cache in order to load the new set of data is determined and unloaded.ie.,for example if 3 cache entries qualify for pre-emption, out of these, those entries that free up more cache memory is removed prior to others. Please refer [FAQs](./faq.md#how-to-check-lru-cache-memory-footprint) for checking LRU cache memory footprint. | | carbon.max.executor.lru.cache.size | -1 | Maximum memory **(in MB)** upto which the executor process can cache the data (BTree and reverse dictionary values).Default value of -1 means there is no memory limit for caching. Only integer values greater than 0 are accepted.**NOTE:** If this parameter is not configured, then the value of ***carbon.max.driver.lru.cache.size*** will be used. | | max.query.execution.time | 60 | Maximum time allowed for one query to be executed. The value is in minutes. | | carbon.enableMinMax | true | CarbonData maintains the metadata which enables to prune unnecessary files from being scanned as per the query conditions.To achieve pruning, Min,Max of each column is maintined.Based on the filter condition in the query, certain data can be skipped from scanning by matching the filter value against the min,max values of the column(s) present in that carbondata file.This pruing enhances query performance significantly. | http://git-wip-us.apache.org/repos/asf/carbondata/blob/3f99e9b7/docs/ddl-of-carbondata.md -- diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md index 2a467a2..5eeba86 100644 --- a/docs/ddl-of-carbondata.md +++ b/docs/ddl-of-carbondata.md @@ -231,7 +231,13 @@ CarbonData DDL statements are documented here,which includes: * In case of multi-level complex dataType columns, primitive string/varchar/char columns are considered for local dictionary generation. - Local dictionary will have to be enabled explicitly during create
[1/4] carbondata git commit: [CARBONDATA-2896][Refactor] Adaptive Encoding for Primitive data types
Repository: carbondata Updated Branches: refs/heads/master 61fcdf286 -> c8f706304 http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java index d3d538a..c4416d5 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/SortParameters.java @@ -18,6 +18,7 @@ package org.apache.carbondata.processing.sort.sortdata; import java.io.File; import java.io.Serializable; +import java.util.Map; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; @@ -88,6 +89,17 @@ public class SortParameters implements Serializable { private DataType[] measureDataType; + // no dictionary data types of the table + private DataType[] noDictDataType; + + // no dictionary columns data types participating in sort + // used while writing the row to sort temp file where sort no dict columns are handled seperately + private DataType[] noDictSortDataType; + + // no dictionary columns data types not participating in sort + // used while writing the row to sort temp file where nosort nodict columns are handled seperately + private DataType[] noDictNoSortDataType; + /** * To know how many columns are of high cardinality. */ @@ -111,6 +123,8 @@ public class SortParameters implements Serializable { private boolean[] noDictionaryDimnesionColumn; private boolean[] noDictionarySortColumn; + + private boolean[] sortColumn; /** * whether dimension is varchar data type. * since all dimensions are string, we use an array of boolean instead of datatypes @@ -142,11 +156,15 @@ public class SortParameters implements Serializable { parameters.databaseName = databaseName; parameters.tableName = tableName; parameters.measureDataType = measureDataType; +parameters.noDictDataType = noDictDataType; +parameters.noDictSortDataType = noDictSortDataType; +parameters.noDictNoSortDataType = noDictNoSortDataType; parameters.noDictionaryCount = noDictionaryCount; parameters.partitionID = partitionID; parameters.segmentId = segmentId; parameters.taskNo = taskNo; parameters.noDictionaryDimnesionColumn = noDictionaryDimnesionColumn; +parameters.sortColumn = sortColumn; parameters.isVarcharDimensionColumn = isVarcharDimensionColumn; parameters.noDictionarySortColumn = noDictionarySortColumn; parameters.numberOfSortColumns = numberOfSortColumns; @@ -382,7 +400,10 @@ public class SortParameters implements Serializable { parameters.setNumberOfSortColumns(configuration.getNumberOfSortColumns()); parameters.setNumberOfNoDictSortColumns(configuration.getNumberOfNoDictSortColumns()); -setNoDictionarySortColumnMapping(parameters); +parameters.setNoDictionarySortColumn(CarbonDataProcessorUtil + .getNoDictSortColMapping(configuration.getTableIdentifier().getDatabaseName(), +configuration.getTableIdentifier().getTableName())); +parameters.setSortColumn(configuration.getSortColumnMapping()); parameters.setObserver(new SortObserver()); // get sort buffer size parameters.setSortBufferSize(Integer.parseInt(carbonProperties @@ -431,6 +452,14 @@ public class SortParameters implements Serializable { DataType[] measureDataType = configuration.getMeasureDataType(); parameters.setMeasureDataType(measureDataType); +parameters.setNoDictDataType(CarbonDataProcessorUtil + .getNoDictDataTypes(configuration.getTableIdentifier().getDatabaseName(), +configuration.getTableIdentifier().getTableName())); +Map noDictSortAndNoSortDataTypes = CarbonDataProcessorUtil + .getNoDictSortAndNoSortDataTypes(configuration.getTableIdentifier().getDatabaseName(), +configuration.getTableIdentifier().getTableName()); + parameters.setNoDictSortDataType(noDictSortAndNoSortDataTypes.get("noDictSortDataTypes")); + parameters.setNoDictNoSortDataType(noDictSortAndNoSortDataTypes.get("noDictNoSortDataTypes")); return parameters; } @@ -442,28 +471,10 @@ public class SortParameters implements Serializable { this.rangeId = rangeId; } - /** - * this method will set the boolean mapping for no dictionary sort columns - * - * @param parameters - */ - private static void setNoDictionarySortColumnMapping(SortParameters parameters) { -if (parameters.getNumberOfSortColumns() == parameters.getNoDictionaryDimnesionColumn().length) { - parameters.setNoDictionarySortColumn(parameters.getNoDi
[3/4] carbondata git commit: [CARBONDATA-2896][Refactor] Adaptive Encoding for Primitive data types
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/ComplexDimensionIndexCodec.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/ComplexDimensionIndexCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/ComplexDimensionIndexCodec.java index cc044cc..f232652 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/ComplexDimensionIndexCodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/ComplexDimensionIndexCodec.java @@ -21,8 +21,8 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorage; import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort; -import org.apache.carbondata.core.datastore.columnar.IndexStorage; import org.apache.carbondata.core.datastore.compression.Compressor; import org.apache.carbondata.core.datastore.compression.CompressorFactory; import org.apache.carbondata.core.datastore.page.ColumnPage; @@ -46,7 +46,7 @@ public class ComplexDimensionIndexCodec extends IndexStorageCodec { return new IndexStorageEncoder() { @Override void encodeIndexStorage(ColumnPage inputPage) { -IndexStorage indexStorage = +BlockIndexerStorage indexStorage = new BlockIndexerStorageForShort(inputPage.getByteArrayPage(), false, false, false); byte[] flattened = ByteUtil.flatten(indexStorage.getDataPage()); Compressor compressor = CompressorFactory.getInstance().getCompressor( http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DictDimensionIndexCodec.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DictDimensionIndexCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DictDimensionIndexCodec.java index 66f5f1d..f3475fd 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DictDimensionIndexCodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DictDimensionIndexCodec.java @@ -21,9 +21,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorage; import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForShort; import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort; -import org.apache.carbondata.core.datastore.columnar.IndexStorage; import org.apache.carbondata.core.datastore.compression.Compressor; import org.apache.carbondata.core.datastore.compression.CompressorFactory; import org.apache.carbondata.core.datastore.page.ColumnPage; @@ -47,7 +47,7 @@ public class DictDimensionIndexCodec extends IndexStorageCodec { return new IndexStorageEncoder() { @Override void encodeIndexStorage(ColumnPage inputPage) { -IndexStorage indexStorage; +BlockIndexerStorage indexStorage; byte[][] data = inputPage.getByteArrayPage(); if (isInvertedIndex) { indexStorage = new BlockIndexerStorageForShort(data, true, false, isSort); http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DirectDictDimensionIndexCodec.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DirectDictDimensionIndexCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DirectDictDimensionIndexCodec.java index a130cbd..15827f8 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DirectDictDimensionIndexCodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/dimension/legacy/DirectDictDimensionIndexCodec.java @@ -21,9 +21,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorage; import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForNoInvertedIndexForShort; import org.apache.carbondata.core.datastore.columnar.BlockIndexerStorageForShort; -import org.apache.carbondata.core.datastore.columnar.IndexStorage; import org.apache.carbondata.core.datastore.compression
[4/4] carbondata git commit: [CARBONDATA-2896][Refactor] Adaptive Encoding for Primitive data types
[CARBONDATA-2896][Refactor] Adaptive Encoding for Primitive data types Loading configurations and settings (1) Parse data as like that of measure, so change in FieldEncoderFactory to take up measure flow (2) While creating loading configurations, no dictionary, sort columns should be taken care in all the needed flows Sort rows preparation (1) Prepare the row to be sorted with original data for no dictionary columns (2) Use data type based comparators for the no dictionary sort columns in all the flows like Intermediate Sort, Final sort, Unsafe sort (3) Handle read write of row with no dictionary primitive data types to intermediate files and in the final file merger, as we will be reading and writing as original data (4) Get the no dictionary sort data types from the load configurations what we set in LOAD step Adding to Column page and apply adaptive encoding (1) Add the no dictionary primitive datatypes data as original data (2) Apply adaptive encoding to the page (3) Reuse the adaptive encoding techniques existing for measure column Writing inverted index to adaptive encoded page (1) Prepare in the inverted inverted list based on the datatype based comparison (2) Apply RLE on the inverted index (3) Write the inverted index to the encoded page Create decoder while querying (1) Create proper decoder for the no dictionary column pages (2) Uncompress the column page and also the inverted index Filter flow changes (1) FilterValues will be in bytes, so convert the data to bytes for comparison (2) Change the isScanRequired to compare min/max values based on the data type Fill output row in case of queries (1) Change the noDictionaryKeys to Object, now it can be datatypes based data for no dictionary primitive data types Bloom filter changes (1) Change bloom filter load (2) While rebuilding the data map, the load expects the data to original data. Therefore a conversion is used (3) Fill the no dictionary primitive data as original data Compaction Changes Compaction will get the rows from the result collectors. But the result collectors will give bytes as no dictionary columns. So a conversion is needed to convert the bytes to original data based on the data type. This closes #2654 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c8f70630 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c8f70630 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c8f70630 Branch: refs/heads/master Commit: c8f7063048115d161de539cf277cc1ccb015159b Parents: 61fcdf2 Author: dhatchayani Authored: Wed Aug 22 12:45:44 2018 +0530 Committer: manishgupta88 Committed: Tue Sep 18 19:12:56 2018 +0530 -- .../carbondata/core/datastore/TableSpec.java| 17 + ...mpressedDimensionChunkFileBasedReaderV3.java | 42 +- .../chunk/store/ColumnPageWrapper.java | 106 - ...feVariableLengthDimensionDataChunkStore.java | 28 +- .../datastore/columnar/BlockIndexerStorage.java | 104 + .../BlockIndexerStorageForNoDictionary.java | 116 ++ ...ndexerStorageForNoInvertedIndexForShort.java | 17 +- .../columnar/BlockIndexerStorageForShort.java | 71 +--- .../ColumnWithRowIdForNoDictionary.java | 72 .../core/datastore/columnar/IndexStorage.java | 35 -- .../page/encoding/ColumnPageEncoder.java| 48 ++- .../page/encoding/DefaultEncodingFactory.java | 70 +++- .../page/encoding/EncodingFactory.java | 18 +- .../page/encoding/adaptive/AdaptiveCodec.java | 195 - .../adaptive/AdaptiveDeltaFloatingCodec.java| 31 +- .../adaptive/AdaptiveDeltaIntegralCodec.java| 30 +- .../adaptive/AdaptiveFloatingCodec.java | 30 +- .../adaptive/AdaptiveIntegralCodec.java | 30 +- .../legacy/ComplexDimensionIndexCodec.java | 4 +- .../legacy/DictDimensionIndexCodec.java | 4 +- .../legacy/DirectDictDimensionIndexCodec.java | 4 +- .../legacy/HighCardDictDimensionIndexCodec.java | 4 +- .../dimension/legacy/IndexStorageEncoder.java | 8 +- .../core/datastore/page/key/TablePageKey.java | 3 +- .../page/statistics/TablePageStatistics.java| 14 +- .../core/datastore/row/WriteStepRowUtil.java| 28 +- .../core/scan/executor/util/QueryUtil.java | 36 ++ .../carbondata/core/scan/filter/FilterUtil.java | 42 +- .../executer/ExcludeFilterExecuterImpl.java | 2 +- .../executer/IncludeFilterExecuterImpl.java | 58 ++- .../executer/RangeValueFilterExecuterImpl.java | 39 +- .../executer/RestructureEvaluatorImpl.java | 4 +- .../executer/RowLevelFilterExecuterImpl.java| 10 +- .../RowLevelRangeGrtThanFiterExecuterImpl.java | 55 ++- ...elRangeGrtrThanEquaToFilterExecuterImpl.java | 55 ++- ...velRangeLessThanEqualFilterExecuterImpl.java | 53 ++- ...RowLevelRangeLessThanFilterExecuterImpl.java | 53
[2/4] carbondata git commit: [CARBONDATA-2896][Refactor] Adaptive Encoding for Primitive data types
http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapBuilder.java -- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapBuilder.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapBuilder.java index 29e3060..29a4098 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapBuilder.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapBuilder.java @@ -27,6 +27,7 @@ import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; /** * Implementation for BloomFilter DataMap to rebuild the datamap for main table with existing data @@ -61,8 +62,12 @@ public class BloomDataMapBuilder extends AbstractBloomDataMapWriter implements D } @Override - protected byte[] convertNonDictionaryValue(int indexColIdx, byte[] value) { -return value; + protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) { +// no dictionary measure columns will be of original data, so convert it to bytes +if (DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) { + return CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value); +} +return (byte[]) value; } @Override http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java -- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java index cad9787..61bd036 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomDataMapWriter.java @@ -29,6 +29,7 @@ import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.Predicate; @@ -73,11 +74,14 @@ public class BloomDataMapWriter extends AbstractBloomDataMapWriter { } } - protected byte[] convertNonDictionaryValue(int indexColIdx, byte[] value) { + protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) { if (DataTypes.VARCHAR == indexColumns.get(indexColIdx).getDataType()) { - return DataConvertUtil.getRawBytesForVarchar(value); + return DataConvertUtil.getRawBytesForVarchar((byte[]) value); +} else if (DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) { + // get bytes for the original value of the no dictionary column + return CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value); } else { - return DataConvertUtil.getRawBytes(value); + return DataConvertUtil.getRawBytes((byte[]) value); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/c8f70630/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java -- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java index 7cd241a..5525941 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java @@ -245,7 +245,7 @@ public class StoreCreator { date.setEncodingList(encodings); date.setColumnUniqueId(UUID.randomUUID().toString()); date.setDimensionColumn(true); -date.setColumnReferenceId(id.getColumnUniqueId()); +date.setColumnReferenceId(date.getColumnUniqueId()); date.setSchemaOrdinal(schemaOrdinal++); if (sortColumns.contains(date.getColumnName())) { date.setSortColumn(true); @@ -263,7 +263,7 @@ public class StoreCreator { if (sortColumns.contains(country.getColumnName())) { country.setSortColumn(true); } -country.setColumnReferenceId(id.getColumnUniqueId()); +country.setColumnReferenceId(country.getColumnUniqueId()); columnSchemas.add(country); ColumnS
carbondata git commit: [HOTFIX] Fixed 2.3 CI
Repository: carbondata Updated Branches: refs/heads/master ac79a343f -> 2fb7dc9a7 [HOTFIX] Fixed 2.3 CI Fixed following issues: 1. FIxed lz4 jar issue by excluding from kafka dependency 2. Fixed constructor not found for reset command. 3. Removed warn logger for sparkcontext to reduce logs in CI. This closes #2716 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2fb7dc9a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2fb7dc9a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2fb7dc9a Branch: refs/heads/master Commit: 2fb7dc9a7831b0d092b98d6716b9e065bd859fe1 Parents: ac79a34 Author: ravipesala Authored: Fri Sep 14 17:38:30 2018 +0530 Committer: manishgupta88 Committed: Fri Sep 14 20:10:58 2018 +0530 -- core/pom.xml| 6 ++--- datamap/mv/plan/pom.xml | 6 - examples/spark2/pom.xml | 15 +-- .../sdv/generated/SetParameterTestCase.scala| 2 ++ integration/spark-common-test/pom.xml | 26 ...eneFineGrainDataMapWithSearchModeSuite.scala | 1 - .../preaggregate/TestPreAggregateLoad.scala | 17 + .../preaggregate/TestPreAggregateMisc.scala | 1 + ...tSparkCarbonFileFormatWithSparkSession.scala | 2 -- .../detailquery/SearchModeTestCase.scala| 1 - integration/spark-datasource/pom.xml| 9 --- integration/spark2/pom.xml | 21 +--- .../commands/SetCommandTestCase.scala | 1 + pom.xml | 6 + 14 files changed, 37 insertions(+), 77 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/2fb7dc9a/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 51c603e..a7d6f4d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -114,9 +114,9 @@ 4.0.42.Final - net.jpountz.lz4 - lz4 - 1.3.0 + org.lz4 + lz4-java + 1.4.0 http://git-wip-us.apache.org/repos/asf/carbondata/blob/2fb7dc9a/datamap/mv/plan/pom.xml -- diff --git a/datamap/mv/plan/pom.xml b/datamap/mv/plan/pom.xml index 982724d..ff6976d 100644 --- a/datamap/mv/plan/pom.xml +++ b/datamap/mv/plan/pom.xml @@ -48,12 +48,6 @@ org.apache.spark spark-core_${scala.binary.version} ${spark.version} - - - net.jpountz.lz4 - lz4 - - http://git-wip-us.apache.org/repos/asf/carbondata/blob/2fb7dc9a/examples/spark2/pom.xml -- diff --git a/examples/spark2/pom.xml b/examples/spark2/pom.xml index bd497c5..aa7b7c5 100644 --- a/examples/spark2/pom.xml +++ b/examples/spark2/pom.xml @@ -56,6 +56,12 @@ org.apache.spark spark-sql-kafka-0-10_${scala.binary.version} + + + net.jpountz.lz4 + lz4 + + org.apache.spark @@ -91,15 +97,6 @@ org.apache.carbondata carbondata-core ${project.version} - - - - net.jpountz.lz4 - lz4 - - http://git-wip-us.apache.org/repos/asf/carbondata/blob/2fb7dc9a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SetParameterTestCase.scala -- diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SetParameterTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SetParameterTestCase.scala index f3622dc..8c336d8 100644 --- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SetParameterTestCase.scala +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/SetParameterTestCase.scala @@ -40,6 +40,7 @@ class SetParameterTestCase extends QueryTest with BeforeAndAfterAll { sql("drop table if exists carbon_table_single_pass") sql("drop table if exists carbon_table_disable_bad_record_logger") sql("drop table if exists carbon_table_load") +sqlContext.sparkSession.catalog.clearCache() sql("RESET") } @@ -155,6 +156,7 @@ class SetParameterTestCase extends QueryTest with BeforeAndAfterAll { test("TC_007-test SET property IS__EMPTY_DATA_BAD_RECORD=FALSE") { sql("drop table if exists emptyColumnValues") +sqlContext.sp
carbondata git commit: [CARBONDATA-2876]Fix Avro decimal datatype with precision and scale
Repository: carbondata Updated Branches: refs/heads/master 0483b46e9 -> 9ebab5748 [CARBONDATA-2876]Fix Avro decimal datatype with precision and scale 1.Add precision and scale for fieldvalue for Avro Decimal logical type. 2.If Avro schema is of union type with multiple record or multiple enum, then add check for schema. This closes #2687 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9ebab574 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9ebab574 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9ebab574 Branch: refs/heads/master Commit: 9ebab5748969398cf12969eedd4701c30bc028cd Parents: 0483b46 Author: Indhumathi27 Authored: Mon Sep 3 17:35:01 2018 +0530 Committer: manishgupta88 Committed: Mon Sep 10 12:14:24 2018 +0530 -- ...ansactionalCarbonTableWithAvroDataType.scala | 470 ++- .../carbondata/sdk/file/AvroCarbonWriter.java | 77 ++- 2 files changed, 505 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/9ebab574/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala index 29aa2de..dc13b16 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala @@ -18,8 +18,14 @@ package org.apache.carbondata.spark.testsuite.createTable import java.io.File +import java.nio.ByteBuffer +import javax.xml.bind.DatatypeConverter + import scala.collection.mutable +import org.apache.avro.Conversions.DecimalConversion +import org.apache.avro.{LogicalTypes, Schema} +import org.apache.avro.generic.GenericData import org.apache.spark.sql.Row import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll @@ -46,6 +52,8 @@ class TestNonTransactionalCarbonTableWithAvroDataType extends QueryTest with Bef writerPath = writerPath.replace("\\", "/") + val decimalConversion = new DecimalConversion + override def beforeAll(): Unit = { sql("DROP TABLE IF EXISTS sdkOutputTable") CarbonProperties.getInstance() @@ -678,7 +686,7 @@ class TestNonTransactionalCarbonTableWithAvroDataType extends QueryTest with Bef | "name": "StudentActivity", | "fields": [ | { -|"name": "enum_field", "type": [{ +|"name": "union_field", "type": [{ | "namespace": "org.example.avro", | "name": "dec", | "type": "bytes", @@ -689,15 +697,27 @@ class TestNonTransactionalCarbonTableWithAvroDataType extends QueryTest with Bef | }] |}""".stripMargin -val json1 = - """{"enum_field":{"bytes":"1010"}}""".stripMargin - val nn = new org.apache.avro.Schema.Parser().parse(schema1) +val decimalConversion = new DecimalConversion +val logicalType = LogicalTypes.decimal(10, 2) +val decimal = new java.math.BigDecimal("1010").setScale(2) +//get unscaled 2's complement bytearray +val bytes = + decimalConversion.toBytes(decimal, nn.getField("union_field").schema, logicalType) +val data = DatatypeConverter.printBase64Binary(bytes.array()) +val json1 = + s"""{"union_field":{"bytes":"$data"}}""".stripMargin val record = testUtil.jsonToAvro(json1, schema1) +val data1 = new String(record.get(0).asInstanceOf[ByteBuffer].array(), + CarbonCommonConstants.DEFAULT_CHARSET_CLASS) +val bytes1 = ByteBuffer.wrap(DatatypeConverter.parseBase64Binary(data1)) +val avroRec = new GenericData. Record(nn) +avroRec.put("union_field", bytes1) + val writer = CarbonWriter.builder .outputPath(writerPath).isTransactionalTable(false).buildWriterForAvroInput(nn) -writer.write(record) +writer.write(avroRec
[2/2] carbondata git commit: [CARBONDATA-2910] Support backward compatability in fileformat and added tests for load with different sort orders
[CARBONDATA-2910] Support backward compatability in fileformat and added tests for load with different sort orders 1. The data loaded by old version with all dictionary exclude can now work with fileformat if the segment folder is given for reading. 2. Now user can specify different sort options per load while loading data through sdk, fileformat can read now. This closes #2685 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3894e1d0 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3894e1d0 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3894e1d0 Branch: refs/heads/master Commit: 3894e1d050cc39959a6445f97a7850ac922b7bd8 Parents: b6bd90d Author: ravipesala Authored: Thu Aug 30 20:41:06 2018 +0530 Committer: manishgupta88 Committed: Fri Sep 7 20:17:44 2018 +0530 -- .../carbondata/core/datamap/TableDataMap.java | 34 .../carbondata/core/datamap/dev/DataMap.java| 13 +- .../dev/cgdatamap/CoarseGrainDataMap.java | 12 ++ .../datamap/dev/fgdatamap/FineGrainDataMap.java | 12 ++ .../indexstore/blockletindex/BlockDataMap.java | 27 +++- .../blockletindex/BlockletDataMapFactory.java | 3 +- .../core/metadata/schema/table/CarbonTable.java | 11 +- .../executor/impl/AbstractQueryExecutor.java| 97 .../core/scan/executor/util/QueryUtil.java | 19 +++ .../core/scan/expression/ColumnExpression.java | 7 + .../carbondata/core/scan/model/QueryModel.java | 64 ++-- .../core/scan/model/QueryModelBuilder.java | 21 ++- .../util/AbstractDataFileFooterConverter.java | 12 ++ .../core/util/BlockletDataMapUtil.java | 13 +- .../hadoop/api/CarbonFileInputFormat.java | 11 +- .../hadoop/api/CarbonInputFormat.java | 26 ++-- .../hadoop/api/CarbonTableInputFormat.java | 23 ++- .../hadoop/testutil/StoreCreator.java | 101 +--- .../hadoop/ft/CarbonTableInputFormatTest.java | 27 ++-- .../hadoop/ft/CarbonTableOutputFormatTest.java | 3 +- ...ithColumnMetCacheAndCacheLevelProperty.scala | 4 +- .../TestNonTransactionalCarbonTable.scala | 19 +-- .../execution/datasources/CarbonFileIndex.scala | 10 +- .../datasources/SparkCarbonFileFormat.scala | 14 +- .../datasource/SparkCarbonDataSourceTest.scala | 156 ++- .../streaming/CarbonStreamOutputFormatTest.java | 3 +- 26 files changed, 617 insertions(+), 125 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3894e1d0/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java b/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java index aed8c60..a272777 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/TableDataMap.java @@ -36,6 +36,7 @@ import org.apache.carbondata.core.indexstore.PartitionSpec; import org.apache.carbondata.core.indexstore.SegmentPropertiesFetcher; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.schema.table.DataMapSchema; +import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; import org.apache.carbondata.events.Event; import org.apache.carbondata.events.OperationContext; @@ -79,6 +80,39 @@ public final class TableDataMap extends OperationEventListener { return blockletDetailsFetcher; } + + /** + * Pass the valid segments and prune the datamap using filter expression + * + * @param segments + * @param filterExp + * @return + */ + public List prune(List segments, Expression filterExp, + List partitions) throws IOException { +List blocklets = new ArrayList<>(); +SegmentProperties segmentProperties; +Map> dataMaps = dataMapFactory.getDataMaps(segments); +for (Segment segment : segments) { + List pruneBlocklets = new ArrayList<>(); + // if filter is not passed then return all the blocklets + if (filterExp == null) { +pruneBlocklets = blockletDetailsFetcher.getAllBlocklets(segment, partitions); + } else { +segmentProperties = segmentPropertiesFetcher.getSegmentProperties(segment); +for (DataMap dataMap : dataMaps.get(segment)) { + + pruneBlocklets + .addAll(dataMap.prune(filterExp, segmentProperties, partitions, identifier)); +} + } + blocklets.addAll(addSegmentId( + blockletDetailsFetcher.getExtendedBlocklets(pruneBlocklets, segment), + s
[1/2] carbondata git commit: [CARBONDATA-2910] Support backward compatability in fileformat and added tests for load with different sort orders
Repository: carbondata Updated Branches: refs/heads/master b6bd90d80 -> 3894e1d05 http://git-wip-us.apache.org/repos/asf/carbondata/blob/3894e1d0/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala -- diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala index 837bc4f..dcc76d8 100644 --- a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala +++ b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala @@ -17,6 +17,11 @@ package org.apache.spark.sql.carbondata.datasource +import java.io.File +import java.util + +import scala.collection.JavaConverters._ + import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.carbondata.datasource.TestUtil._ import org.scalatest.{BeforeAndAfterAll, FunSuite} @@ -24,6 +29,9 @@ import org.scalatest.{BeforeAndAfterAll, FunSuite} import org.apache.carbondata.core.datamap.DataMapStoreManager import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier +import org.apache.carbondata.core.metadata.datatype.DataTypes +import org.apache.carbondata.hadoop.testutil.StoreCreator +import org.apache.carbondata.sdk.file.{CarbonWriter, Field, Schema} class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll { @@ -346,7 +354,7 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll { df.write.format("carbon").save(warehouse1 + "/test_folder/") if (!spark.sparkContext.version.startsWith("2.1")) { spark -.sql(s"create table test123 (c1 string, c2 string, arrayc array, structc struct<_1:string, _2:decimal(38,18)>, shortc smallint,intc int, longc bigint, doublec double, bigdecimalc decimal(38,18)) using carbon location '$warehouse1/test_folder/'") +.sql(s"create table test123 (c1 string, c2 string, shortc smallint,intc int, longc bigint, doublec double, bigdecimalc decimal(38,18), arrayc array, structc struct<_1:string, _2:decimal(38,18)>) using carbon location '$warehouse1/test_folder/'") checkAnswer(spark.sql("select * from test123"), spark.read.format("carbon").load(warehouse1 + "/test_folder/")) @@ -613,6 +621,152 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll { FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(warehouse1 + "/test_folder")) } } + + test("test read using old data") { +val store = new StoreCreator(new File(warehouse1).getAbsolutePath, + new File(warehouse1 + "../../../../../hadoop/src/test/resources/data.csv").getCanonicalPath, + false) +store.createCarbonStore() +FileFactory.deleteAllFilesOfDir(new File(warehouse1+"/testdb/testtable/Fact/Part0/Segment_0/0")) +val dfread = spark.read.format("carbon").load(warehouse1+"/testdb/testtable/Fact/Part0/Segment_0") +dfread.show(false) +spark.sql("drop table if exists parquet_table") + } + + test("test read using different sort order data") { +if (!spark.sparkContext.version.startsWith("2.1")) { + spark.sql("drop table if exists old_comp") + FileFactory.deleteAllFilesOfDir(new File(warehouse1 + "/testdb")) + val store = new StoreCreator(new File(warehouse1).getAbsolutePath, +new File(warehouse1 + "../../../../../hadoop/src/test/resources/data.csv").getCanonicalPath, +false) + store.setSortColumns(new util.ArrayList[String](Seq("name").asJava)) + var model = store.createTableAndLoadModel(false) + model.setSegmentId("0") + store.createCarbonStore(model) + FileFactory.deleteAllFilesOfDir(new File(warehouse1 + "/testdb/testtable/Fact/Part0/Segment_0/0")) + store.setSortColumns(new util.ArrayList[String](Seq("country,phonetype").asJava)) + model = store.createTableAndLoadModel(false) + model.setSegmentId("1") + store.createCarbonStore(model) + FileFactory.deleteAllFilesOfDir(new File(warehouse1 + "/testdb/testtable/Fact/Part0/Segment_1/0")) + store.setSortColumns(new util.ArrayList[String](Seq("date").asJava)) + model = store.createTableAndLoadModel(false) + model.setSegmentId("2") + store.createCarbonStore(model) + FileFactory.deleteAllFilesOfDir(new File(warehouse1 + "/testdb/testtable/Fact/Part0/Segment_2/0")) + store.setSortColumns(new util.ArrayList[String](Seq("serialname").asJava)) + model = store.createTableAndLoadModel(false) + model.setSegmentId("3") + store.createCarbonStore(model) + FileFactory.deleteAllFilesOfDir(new File(warehouse1 + "
carbondata git commit: [CARBONDATA-2876]AVRO datatype support through SDK
Repository: carbondata Updated Branches: refs/heads/master 67a8a37bf -> b6bd90d80 [CARBONDATA-2876]AVRO datatype support through SDK This PR supports following Avro DataTypes to carbon format through SDK. Avro datatypes include, 1. Avro Logical type TimeMillis 2. Avro Logical type TimeMicros This closes #2694 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b6bd90d8 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b6bd90d8 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b6bd90d8 Branch: refs/heads/master Commit: b6bd90d80106a3a4f7e24fc6bf63aa2ac135f2fc Parents: 67a8a37 Author: Indhumathi27 Authored: Mon Sep 3 10:17:20 2018 +0530 Committer: manishgupta88 Committed: Fri Sep 7 10:44:16 2018 +0530 -- ...ansactionalCarbonTableWithAvroDataType.scala | 94 .../carbondata/sdk/file/AvroCarbonWriter.java | 24 ++--- 2 files changed, 106 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/b6bd90d8/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala index 7616ea3..29aa2de 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala @@ -790,4 +790,98 @@ class TestNonTransactionalCarbonTableWithAvroDataType extends QueryTest with Bef checkExistence(sql("select * from sdkOutputTable"), true, "32.0") } + test("test logical type time-millis") { +sql("drop table if exists sdkOutputTable") + FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(writerPath)) +val schema1 = + """{ +| "namespace": "com.apache.schema", +| "type": "record", +| "name": "StudentActivity", +| "fields": [ +| { +| "name": "id", +| "type": {"type" : "int", "logicalType": "time-millis"} +| }, +| { +| "name": "course_details", +| "type": { +| "name": "course_details", +| "type": "record", +| "fields": [ +| { +| "name": "course_struct_course_time", +| "type": {"type" : "int", "logicalType": "time-millis"} +| } +| ] +| } +| } +| ] +|}""".stripMargin + +val json1 = + """{"id": 172800,"course_details": { "course_struct_course_time":172800}}""".stripMargin + +val nn = new org.apache.avro.Schema.Parser().parse(schema1) +val record = testUtil.jsonToAvro(json1, schema1) + + +val writer = CarbonWriter.builder + .outputPath(writerPath).isTransactionalTable(false).buildWriterForAvroInput(nn) +writer.write(record) +writer.close() +sql( + s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY + |'carbondata' LOCATION + |'$writerPath' """.stripMargin) +checkAnswer(sql("select * from sdkOutputTable"), Seq(Row(172800, Row(172800 + } + + test("test logical type time-micros") { +sql("drop table if exists sdkOutputTable") + FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(writerPath)) +val schema1 = + """{ +| &quo
carbondata git commit: [CARBONDATA-2876]Support Avro datatype conversion through SDK
Repository: carbondata Updated Branches: refs/heads/master f012f5b13 -> b588cb655 [CARBONDATA-2876]Support Avro datatype conversion through SDK This PR supports following Avro DataTypes to carbon format through SDK. Avro datatypes include, 1. Avro Union 2. Avro Enum 3. Avro Logical type Decimal This closes #2671 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b588cb65 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b588cb65 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b588cb65 Branch: refs/heads/master Commit: b588cb65564d26cdf55da7482ae7b1ee79173067 Parents: f012f5b Author: Indhumathi27 Authored: Thu Aug 30 14:50:06 2018 +0530 Committer: manishgupta88 Committed: Fri Aug 31 14:41:56 2018 +0530 -- ...ansactionalCarbonTableWithAvroDataType.scala | 793 +++ .../carbondata/sdk/file/AvroCarbonWriter.java | 331 +++- 2 files changed, 1088 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/b588cb65/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala new file mode 100644 index 000..b50407c --- /dev/null +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTableWithAvroDataType.scala @@ -0,0 +1,793 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark.testsuite.createTable + +import java.io.File +import scala.collection.mutable + +import org.apache.spark.sql.Row +import org.apache.spark.sql.test.util.QueryTest +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.impl.FileFactory +import org.apache.carbondata.core.util.CarbonProperties +import org.apache.carbondata.sdk.file.CarbonWriter + +/** + * Test class for Avro supported data types through SDK + */ +class TestNonTransactionalCarbonTableWithAvroDataType extends QueryTest with BeforeAndAfterAll { + + + val badRecordAction = CarbonProperties.getInstance() +.getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION) + + var writerPath = new File(this.getClass.getResource("/").getPath ++ +"../." + +"./target/SparkCarbonFileFormat/WriterOutput/") +.getCanonicalPath + + writerPath = writerPath.replace("\\", "/") + + override def beforeAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "force") + } + + override def afterAll(): Unit = { +sql("DROP TABLE IF EXISTS sdkOutputTable") +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, badRecordAction) + } + + test("test enum") { +sql("drop table if exists sdkOutputTable") + FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(writerPath)) +val schema1 = + """{ +| "namespace": "com.apache.schema", +| "type": "record", +| "name": "StudentActivity", +| "fields": +| [{ +| "name": "id", +|
carbondata git commit: [HOTFIX] Support TableProperties Map API for SDK
Repository: carbondata Updated Branches: refs/heads/master 6029b2800 -> 137245057 [HOTFIX] Support TableProperties Map API for SDK Currently SDK supports load options as map input. But table properties is not a map. So this PR supports a API that can take already supported table properties as map. This is will help for easy configuration for end user of SDK. Also later if new table properties added. No need to create separate API to support that. This closes #2651 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/13724505 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/13724505 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/13724505 Branch: refs/heads/master Commit: 13724505759a9b471ce78c1196bfa5337516e6e1 Parents: 6029b28 Author: ajantha-bhat Authored: Thu Aug 23 15:39:29 2018 +0530 Committer: manishgupta88 Committed: Fri Aug 24 18:29:55 2018 +0530 -- docs/sdk-guide.md | 18 +++ .../TestNonTransactionalCarbonTable.scala | 10 ++-- .../sdk/file/CarbonWriterBuilder.java | 52 +++- 3 files changed, 76 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/13724505/docs/sdk-guide.md -- diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md index e592aa5..8120efa 100644 --- a/docs/sdk-guide.md +++ b/docs/sdk-guide.md @@ -351,6 +351,24 @@ public CarbonWriterBuilder withLoadOptions(Map options); ``` /** + * To support the table properties for sdk writer + * + * @param options key,value pair of create table properties. + * supported keys values are + * a. blocksize -- [1-2048] values in MB. Default value is 1024 + * b. blockletsize -- values in MB. Default value is 64 MB + * c. localDictionaryThreshold -- positive value, default is 1 + * d. enableLocalDictionary -- true / false. Default is false + * e. sortcolumns -- comma separated column. "c1,c2". Default all dimensions are sorted. + * + * @return updated CarbonWriterBuilder + */ +public CarbonWriterBuilder withTableProperties(Map options); +``` + + +``` +/** * Build a {@link CarbonWriter}, which accepts row in CSV format object * @param schema carbon Schema object {org.apache.carbondata.sdk.file.Schema} * @return CSVCarbonWriter http://git-wip-us.apache.org/repos/asf/carbondata/blob/13724505/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index 39f6ddc..b08a8dd 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -33,8 +33,8 @@ import org.apache.avro.file.DataFileWriter import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, Encoder} import org.apache.commons.io.FileUtils -import org.apache.spark.sql.{CarbonEnv, Row} import org.apache.spark.sql.test.util.QueryTest +import org.apache.spark.sql.{CarbonEnv, Row} import org.junit.Assert import org.scalatest.BeforeAndAfterAll @@ -2386,9 +2386,13 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { test("test LocalDictionary with custom Threshold") { FileUtils.deleteDirectory(new File(writerPath)) +val tablePropertiesMap: util.Map[String, String] = + Map("blocksize" -> "12", +"sortcolumns" -> "name", +"localDictionaryThreshold" -> "200", +"enableLocalDictionary" -> "true").asJava val builder = CarbonWriter.builder.isTransactionalTable(false) - .sortBy(Array[String]("name")).withBlockSize(12).enableLocalDictionary(true) - .localDictionaryThreshold(200) + .withTableProperties(tablePropertiesMap) .uniqueIdentifier(System.currentTimeMillis).taskNo(System.nanoTime).outputPath(writerPath) generateCarbonData(builder) assert(FileFactory.getCarbonFile(writerPath).exists()) http://git-wip-us.apache.org/repos/asf/carbondata/blob/13724505/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder
carbondata git commit: [CARBONDATA-2829][CARBONDATA-2832] Fix creating merge index on older V1 V2 store
Repository: carbondata Updated Branches: refs/heads/master 40571b846 -> b702a1b01 [CARBONDATA-2829][CARBONDATA-2832] Fix creating merge index on older V1 V2 store Block merge index creation for the old store V1 V2 versions This closes #2608 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b702a1b0 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b702a1b0 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b702a1b0 Branch: refs/heads/master Commit: b702a1b01414308de710c1d1471a064184843c37 Parents: 40571b8 Author: dhatchayani Authored: Mon Aug 6 12:15:26 2018 +0530 Committer: manishgupta88 Committed: Tue Aug 7 14:10:44 2018 +0530 -- .../management/CarbonAlterTableCompactionCommand.scala| 10 ++ 1 file changed, 10 insertions(+) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/b702a1b0/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala index a4adbbb..e0b0547 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala @@ -37,6 +37,7 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.exception.ConcurrentOperationException import org.apache.carbondata.core.locks.{CarbonLockFactory, LockUsage} +import org.apache.carbondata.core.metadata.ColumnarFormatVersion import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, TableInfo} import org.apache.carbondata.core.mutate.CarbonUpdateUtil import org.apache.carbondata.core.statusmanager.SegmentStatusManager @@ -122,6 +123,15 @@ case class CarbonAlterTableCompactionCommand( "Unsupported alter operation on carbon table: Merge index is not supported on streaming" + " table") } + val version = CarbonUtil.getFormatVersion(table) + val isOlderVersion = version == ColumnarFormatVersion.V1 || + version == ColumnarFormatVersion.V2 + if (isOlderVersion) { +throw new MalformedCarbonCommandException( + "Unsupported alter operation on carbon table: Merge index is not supported on V1 V2 " + + "store segments") + } + val alterTableMergeIndexEvent: AlterTableMergeIndexEvent = AlterTableMergeIndexEvent(sparkSession, table, alterTableModel) OperationListenerBus.getInstance
carbondata git commit: [CARBONDATA-2813] Fixed code to get data size from LoadDetails if size is written there
Repository: carbondata Updated Branches: refs/heads/master f2e898ac5 -> 38384cb9f [CARBONDATA-2813] Fixed code to get data size from LoadDetails if size is written there Problem: In 1.3.x when index files are merged to form mergeindex file a mapping of which index files if merged to which mergeindex is kept in the segments file. In 1.4.x both the index and merge index files are scanned to calculate the size of segments for major compaction. As the index file was deleted in the 1.3.x store therefore in 1.4.x it was throwing "Unable to get File status exception". Solution: Try to the size of the segments from LoadMetadataDetails. If not present then try to read the size from index files. This closes #2600 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/38384cb9 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/38384cb9 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/38384cb9 Branch: refs/heads/master Commit: 38384cb9f309cc7eb83e61e85c48dd8583921004 Parents: f2e898a Author: kunal642 Authored: Thu Aug 2 11:44:20 2018 +0530 Committer: manishgupta88 Committed: Thu Aug 2 18:14:56 2018 +0530 -- .../processing/merger/CarbonDataMergerUtil.java | 12 ++-- 1 file changed, 10 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/38384cb9/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java index 1162fc2..e3da86d 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java +++ b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java @@ -49,6 +49,8 @@ import org.apache.carbondata.core.writer.CarbonDeleteDeltaWriterImpl; import org.apache.carbondata.processing.loading.model.CarbonLoadModel; import org.apache.carbondata.processing.util.CarbonLoaderUtil; +import org.apache.commons.lang.StringUtils; + /** * utility class for load merging. */ @@ -649,8 +651,14 @@ public final class CarbonDataMergerUtil { // variable to store one segment size across partition. long sizeOfOneSegmentAcrossPartition; if (segment.getSegmentFile() != null) { -sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment( -carbonTable.getTablePath(), new Segment(segId, segment.getSegmentFile())); +// If LoadMetaDataDetail already has data size no need to calculate the data size from +// index files. If not there then read the index file and calculate size. +if (!StringUtils.isEmpty(segment.getDataSize())) { + sizeOfOneSegmentAcrossPartition = Long.parseLong(segment.getDataSize()); +} else { + sizeOfOneSegmentAcrossPartition = CarbonUtil.getSizeOfSegment(carbonTable.getTablePath(), + new Segment(segId, segment.getSegmentFile())); +} } else { sizeOfOneSegmentAcrossPartition = getSizeOfSegment(carbonTable.getTablePath(), segId); }
carbondata git commit: [CARBONDATA-2805] Fix the ordering mismatch of segment numbers during cutom compaction
Repository: carbondata Updated Branches: refs/heads/master cfbf7b6ec -> c29aef880 [CARBONDATA-2805] Fix the ordering mismatch of segment numbers during cutom compaction Problem: when we have segments from 0 to 6 and i give 1, 2, 3 for custom compaction, then it should create 1.1 as compacted segment, but sometimes it will create 3.1 as compacted segment which is wrong. This is beacuse custom Segment IDs were passing in hashset and finally inserted in hashmap, while identifying segments to be merged. hashmap and hashset does not guarantee the insertion order which may lead to missmatch of segment numbers. Solution: Use LinkedHashSet and LinkedHashMap which always sure about the insertion order. This closes #2585 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c29aef88 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c29aef88 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c29aef88 Branch: refs/heads/master Commit: c29aef880a57d1f1297361a5296e77af3904d661 Parents: cfbf7b6 Author: akashrn5 Authored: Mon Jul 30 19:22:29 2018 +0530 Committer: manishgupta88 Committed: Wed Aug 1 13:38:08 2018 +0530 -- .../processing/merger/CarbonDataMergerUtil.java | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/c29aef88/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java index 78af751..1162fc2 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java +++ b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java @@ -391,7 +391,6 @@ public final class CarbonDataMergerUtil { CarbonLoadModel carbonLoadModel, long compactionSize, List segments, CompactionType compactionType, List customSegmentIds) throws IOException, MalformedCarbonCommandException { -String tablePath = carbonLoadModel.getTablePath(); Map tableLevelProperties = carbonLoadModel.getCarbonDataLoadSchema() .getCarbonTable().getTableInfo().getFactTable().getTableProperties(); List sortedSegments = new ArrayList(segments); @@ -400,7 +399,7 @@ public final class CarbonDataMergerUtil { if (CompactionType.CUSTOM == compactionType) { return identitySegmentsToBeMergedBasedOnSpecifiedSegments(sortedSegments, - new HashSet<>(customSegmentIds)); + new LinkedHashSet<>(customSegmentIds)); } // Check for segments which are qualified for IUD compaction. @@ -424,7 +423,7 @@ public final class CarbonDataMergerUtil { if (CompactionType.MAJOR == compactionType) { listOfSegmentsToBeMerged = identifySegmentsToBeMergedBasedOnSize(compactionSize, - listOfSegmentsLoadedInSameDateInterval, carbonLoadModel, tablePath); + listOfSegmentsLoadedInSameDateInterval, carbonLoadModel); } else { listOfSegmentsToBeMerged = @@ -462,7 +461,7 @@ public final class CarbonDataMergerUtil { List listOfSegments, Set segmentIds) throws MalformedCarbonCommandException { Map specifiedSegments = -new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); +new LinkedHashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); for (LoadMetadataDetails detail : listOfSegments) { if (segmentIds.contains(detail.getLoadName())) { specifiedSegments.put(detail.getLoadName(), detail); @@ -623,13 +622,12 @@ public final class CarbonDataMergerUtil { * @param listOfSegmentsAfterPreserve the segments list after *preserving the configured number of latest loads * @param carbonLoadModel carbon load model - * @param tablePath the store location of the segment * @return the list of segments that need to be merged * based on the Size in case of Major compaction */ private static List identifySegmentsToBeMergedBasedOnSize( long compactionSize, List listOfSegmentsAfterPreserve, - CarbonLoadModel carbonLoadModel, String tablePath) throws IOException { + CarbonLoadModel carbonLoadModel) throws IOException { List segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
carbondata git commit: [HotFix][CARBONDATA-2788][BloomDataMap] Fix bugs in incorrect query result with bloom datamap
Repository: carbondata Updated Branches: refs/heads/master 34ca02142 -> 1cea4d33f [HotFix][CARBONDATA-2788][BloomDataMap] Fix bugs in incorrect query result with bloom datamap This PR solve two problems which will affect the correctness of the query on bloom. Revert PR2539 After review the code, we found that modification in PR2539 is not needed, so we revert that PR. Bugs in overflow for blocklet count Carbondata stores blocklet count for each block in byte data type, when a block contains more than 128 blocklets, it will overflow the byte limits. Here we change the data type to short. For cache_leve=block, after pruning by main BlockDataMap, the blockletNo in Blocklet is -1, which indicate that the following percedure will scan the whole block -- all the blocklets in the block. So, when doing intersection with the pruned result from BloomDataMap, we need to take care of these blocklets. In this implementation, we added the result from BloomDataMap based on the blocklet's existence in BlockDataMap. This closes #2565 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1cea4d33 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1cea4d33 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1cea4d33 Branch: refs/heads/master Commit: 1cea4d33ff9096fab5d38a1403e1e78c2fa2d6dc Parents: 34ca021 Author: xuchuanyin Authored: Thu Jul 26 23:22:58 2018 +0800 Committer: manishgupta88 Committed: Wed Aug 1 10:40:07 2018 +0530 -- .../indexstore/blockletindex/BlockDataMap.java | 24 +++--- .../blockletindex/BlockletDataMapFactory.java | 2 +- .../hadoop/api/CarbonInputFormat.java | 28 ++-- .../lucene/LuceneFineGrainDataMapSuite.scala| 14 +++--- .../datamap/IndexDataMapRebuildRDD.scala| 10 - .../BloomCoarseGrainDataMapFunctionSuite.scala | 46 +++- .../bloom/BloomCoarseGrainDataMapSuite.scala| 2 +- 7 files changed, 104 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1cea4d33/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java index 82006c3..f4bb58e 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java @@ -17,6 +17,7 @@ package org.apache.carbondata.core.indexstore.blockletindex; import java.io.*; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.BitSet; import java.util.List; @@ -58,7 +59,6 @@ import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.core.util.DataFileFooterConverter; import org.apache.carbondata.core.util.path.CarbonTablePath; -import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; @@ -248,8 +248,8 @@ public class BlockDataMap extends CoarseGrainDataMap byte[][] blockMinValues = null; byte[][] blockMaxValues = null; DataMapRowImpl summaryRow = null; -List blockletCountInEachBlock = new ArrayList<>(indexInfo.size()); -byte totalBlockletsInOneBlock = 0; +List blockletCountInEachBlock = new ArrayList<>(indexInfo.size()); +short totalBlockletsInOneBlock = 0; boolean isLastFileFooterEntryNeedToBeAdded = false; CarbonRowSchema[] schema = getFileFooterEntrySchema(); for (DataFileFooter fileFooter : indexInfo) { @@ -318,13 +318,22 @@ public class BlockDataMap extends CoarseGrainDataMap blockMinValues, blockMaxValues); blockletCountInEachBlock.add(totalBlockletsInOneBlock); } -byte[] blockletCount = ArrayUtils -.toPrimitive(blockletCountInEachBlock.toArray(new Byte[blockletCountInEachBlock.size()])); +byte[] blockletCount = convertRowCountFromShortToByteArray(blockletCountInEachBlock); // blocklet count index is the last index summaryRow.setByteArray(blockletCount, taskSummarySchema.length - 1); return summaryRow; } + private byte[] convertRowCountFromShortToByteArray(List blockletCountInEachBlock) { +int bufferSize = blockletCountInEachBlock.size() * 2; +ByteBuffer byteBuffer = ByteBuffer.allocate(bufferSize); +for (Short blockletCount : blockletCountInEachBlock) { + byteBuffer.putShort(blockletCount); +} +byteBuffer.rewind(); +return byteBuffer.array(); + } + protected void setLocations(String[] locations, DataMap
carbondata git commit: [CARBONDATA-2778]Fixed bug when select after delete and cleanup is showing empty records
Repository: carbondata Updated Branches: refs/heads/master 005db3fa3 -> d62fe9e65 [CARBONDATA-2778]Fixed bug when select after delete and cleanup is showing empty records Problem: In case if delete operation when it is found that the data being deleted is leading to a state where one complete block data is getting deleted. In that case the status if that block is marked for delete and during the next delete operation run the block is deleted along with its carbonIndex file. The problem arises due to deletion of carbonIndex file because for multiple blocks there can be one carbonIndex file as one carbonIndex file represents one task. Solution: Do not delete the carbondata and carbonIndex file. After compaction it will automatically take care of deleting the stale data and stale segments. This closes #2548 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d62fe9e6 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d62fe9e6 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d62fe9e6 Branch: refs/heads/master Commit: d62fe9e65a0fd61832f7b4080672c1503a7a0ae3 Parents: 005db3f Author: kunal642 Authored: Tue Jul 24 16:12:54 2018 +0530 Committer: manishgupta88 Committed: Thu Jul 26 15:08:15 2018 +0530 -- .../core/mutate/CarbonUpdateUtil.java | 16 .../SegmentUpdateStatusManager.java | 27 2 files changed, 43 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d62fe9e6/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java index 4a8d2e8..7df3937 100644 --- a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java @@ -541,22 +541,6 @@ public class CarbonUpdateUtil { compareTimestampsAndDelete(invalidFile, forceDelete, false); } -CarbonFile[] blockRelatedFiles = updateStatusManager -.getAllBlockRelatedFiles(allSegmentFiles, -block.getActualBlockName()); - -// now for each invalid index file need to check the query execution time out -// and then delete. - -for (CarbonFile invalidFile : blockRelatedFiles) { - - if (compareTimestampsAndDelete(invalidFile, forceDelete, false)) { -if (invalidFile.getName().endsWith(CarbonCommonConstants.UPDATE_INDEX_FILE_EXT)) { - updateSegmentFile = true; -} - } -} - } else { invalidDeleteDeltaFiles = updateStatusManager .getDeleteDeltaInvalidFilesList(block, false, http://git-wip-us.apache.org/repos/asf/carbondata/blob/d62fe9e6/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java index 083325d..5d5e8b0 100644 --- a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java +++ b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java @@ -826,31 +826,4 @@ public class SegmentUpdateStatusManager { return files.toArray(new CarbonFile[files.size()]); } - - /** - * - * @param allSegmentFiles - * @return - */ - public CarbonFile[] getAllBlockRelatedFiles(CarbonFile[] allSegmentFiles, - String actualBlockName) { -List files = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); - -for (CarbonFile eachFile : allSegmentFiles) { - - // for carbon data. - if (eachFile.getName().equalsIgnoreCase(actualBlockName)) { -files.add(eachFile); - } - - // get carbon index files of the block. - String indexFileName = CarbonTablePath.getCarbonIndexFileName(actualBlockName); - if (eachFile.getName().equalsIgnoreCase(indexFileName)) { -files.add(eachFile); - } - -} - -return files.toArray(new CarbonFile[files.size()]); - } }
carbondata git commit: [CARBONDATA-2779]Fixed filter query issue in case of V1/v2 format store
Repository: carbondata Updated Branches: refs/heads/master 34e74174e -> 316e9de65 [CARBONDATA-2779]Fixed filter query issue in case of V1/v2 format store Problem: Filter query is failing for V1/V2 carbondata store Root Cause: in V1 store measure min max was not added in blockminmaxindex in executor when filter is applied min max pruning is failing with array index out of cound exception Solution: Need to add min max for measure column same as already handled in driver block pruning This closes #2550 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/316e9de6 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/316e9de6 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/316e9de6 Branch: refs/heads/master Commit: 316e9de658735fe177ca737f35ad23762aa18ad2 Parents: 34e7417 Author: kumarvishal09 Authored: Tue Jul 24 20:10:54 2018 +0530 Committer: manishgupta88 Committed: Wed Jul 25 20:13:02 2018 +0530 -- .../indexstore/blockletindex/IndexWrapper.java | 8 +- .../executor/impl/AbstractQueryExecutor.java| 95 ++-- 2 files changed, 50 insertions(+), 53 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/316e9de6/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java index 1de3122..9588f57 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java @@ -16,7 +16,6 @@ */ package org.apache.carbondata.core.indexstore.blockletindex; -import java.io.IOException; import java.util.List; import org.apache.carbondata.core.datastore.block.AbstractIndex; @@ -34,12 +33,11 @@ public class IndexWrapper extends AbstractIndex { private List blockInfos; - public IndexWrapper(List blockInfos) throws IOException { + public IndexWrapper(List blockInfos, SegmentProperties segmentProperties) { this.blockInfos = blockInfos; -segmentProperties = new SegmentProperties(blockInfos.get(0).getDetailInfo().getColumnSchemas(), -blockInfos.get(0).getDetailInfo().getDimLens()); +this.segmentProperties = segmentProperties; dataRefNode = new BlockletDataRefNode(blockInfos, 0, -segmentProperties.getDimensionColumnsValueSize()); +this.segmentProperties.getDimensionColumnsValueSize()); } @Override public void buildIndex(List footerList) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/316e9de6/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java index c8c8a0f..5b67921 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java @@ -40,13 +40,11 @@ import org.apache.carbondata.core.datastore.IndexKey; import org.apache.carbondata.core.datastore.block.AbstractIndex; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.block.TableBlockInfo; -import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier; import org.apache.carbondata.core.indexstore.BlockletDetailInfo; import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNode; import org.apache.carbondata.core.indexstore.blockletindex.IndexWrapper; import org.apache.carbondata.core.keygenerator.KeyGenException; import org.apache.carbondata.core.memory.UnsafeMemoryManager; -import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; import org.apache.carbondata.core.metadata.datatype.DataType; @@ -65,6 +63,7 @@ import org.apache.carbondata.core.scan.model.ProjectionMeasure; import org.apache.carbondata.core.scan.model.QueryModel; import org.apache.carbondata.core.stats.QueryStatistic; import org.apache.carbondata.core.stats.QueryStatisticsConstants; +import org.apache.carbondata.core.util.BlockletDataMapUtil; import org.apache.carbondata.core.util.CarbonProperties; imp
carbondata git commit: [CARBONDATA-2648] Documentation for support for COLUMN_META_CACHE and CACHE_LEVEL in create table and alter table properties
Repository: carbondata Updated Branches: refs/heads/master 06d38ff4b -> 34e74174e [CARBONDATA-2648] Documentation for support for COLUMN_META_CACHE and CACHE_LEVEL in create table and alter table properties Documentation for support for COLUMN_META_CACHE and CACHE_LEVEL in create table and alter table properties This closes #2558 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/34e74174 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/34e74174 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/34e74174 Branch: refs/heads/master Commit: 34e74174e0e83b00a6dc603eb86bbcc64533d1ac Parents: 06d38ff Author: sgururajshetty Authored: Wed Jul 25 18:14:07 2018 +0530 Committer: manishgupta88 Committed: Wed Jul 25 19:00:15 2018 +0530 -- docs/data-management-on-carbondata.md | 98 +- 1 file changed, 97 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/34e74174/docs/data-management-on-carbondata.md -- diff --git a/docs/data-management-on-carbondata.md b/docs/data-management-on-carbondata.md index 4532b41..da259a6 100644 --- a/docs/data-management-on-carbondata.md +++ b/docs/data-management-on-carbondata.md @@ -141,7 +141,103 @@ This tutorial is going to introduce all commands and data operations on CarbonDa 'SORT_SCOPE'='NO_SORT') ``` **NOTE:** CarbonData also supports "using carbondata". Find example code at [SparkSessionExample](https://github.com/apache/carbondata/blob/master/examples/spark2/src/main/scala/org/apache/carbondata/examples/SparkSessionExample.scala) in the CarbonData repo. - + + - **Caching Min/Max Value for Required Columns** + By default, CarbonData caches min and max values of all the columns in schema. As the load increases, the memory required to hold the min and max values increases considerably. This feature enables you to configure min and max values only for the required columns, resulting in optimized memory usage. + +Following are the valid values for COLUMN_META_CACHE: +* If you want no column min/max values to be cached in the driver. + +``` +COLUMN_META_CACHE=’’ +``` + +* If you want only col1 min/max values to be cached in the driver. + +``` +COLUMN_META_CACHE=’col1’ +``` + +* If you want min/max values to be cached in driver for all the specified columns. + +``` +COLUMN_META_CACHE=’col1,col2,col3,…’ +``` + +Columns to be cached can be specifies either while creating tale or after creation of the table. +During create table operation; specify the columns to be cached in table properties. + +Syntax: + +``` +CREATE TABLE [dbName].tableName (col1 String, col2 String, col3 int,…) STORED BY ‘carbondata’ TBLPROPERTIES (‘COLUMN_META_CACHE’=’col1,col2,…’) +``` + +Example: + +``` +CREATE TABLE employee (name String, city String, id int) STORED BY ‘carbondata’ TBLPROPERTIES (‘COLUMN_META_CACHE’=’name’) +``` + +After creation of table or on already created tables use the alter table command to configure the columns to be cached. + +Syntax: + +``` +ALTER TABLE [dbName].tableName SET TBLPROPERTIES (‘COLUMN_META_CACHE’=’col1,col2,…’) +``` + +Example: + +``` +ALTER TABLE employee SET TBLPROPERTIES (‘COLUMN_META_CACHE’=’city’) +``` + + - **Caching at Block or Blocklet Level** + This feature allows you to maintain the cache at Block level, resulting in optimized usage of the memory. The memory consumption is high if the Blocklet level caching is maintained as a Block can have multiple Blocklet. + +Following are the valid values for CACHE_LEVEL: +* Configuration for caching in driver at Block level (default value). + +``` +CACHE_LEVEL= ‘BLOCK’ +``` + +* Configuration for caching in driver at Blocklet level. + +``` +CACHE_LEVEL= ‘BLOCKLET’ +``` + +Cache level can be specified either while creating table or after creation of the table. +During create table operation specify the cache level in table properties. + +Syntax: + +``` +CREATE TABLE [dbName].tableName (col1 String, col2 String, col3 int,…) STORED BY â
carbondata git commit: [CARBONDATA-2753] Fix Compatibility issues
Repository: carbondata Updated Branches: refs/heads/master 6d40d3a98 -> a37a2ff7f [CARBONDATA-2753] Fix Compatibility issues Dictionary path is set to the AbsolteTableIdentifier. So that the child tables/dependent tables can use the dictionary path of parent/own This closes #2530 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/a37a2ff7 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/a37a2ff7 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/a37a2ff7 Branch: refs/heads/master Commit: a37a2ff7f450a297590d4e97ec81ec5c56a9cc4a Parents: 6d40d3a Author: dhatchayani Authored: Thu Jul 19 19:13:24 2018 +0530 Committer: manishgupta88 Committed: Wed Jul 25 13:52:33 2018 +0530 -- .../core/metadata/AbsoluteTableIdentifier.java | 12 .../core/metadata/schema/table/CarbonTable.java | 6 +++- .../carbondata/core/scan/filter/FilterUtil.java | 29 .../TestBlockletDataMapFactory.java | 6 4 files changed, 34 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/a37a2ff7/core/src/main/java/org/apache/carbondata/core/metadata/AbsoluteTableIdentifier.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/AbsoluteTableIdentifier.java b/core/src/main/java/org/apache/carbondata/core/metadata/AbsoluteTableIdentifier.java index 3ea1f60..4cd33f6 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/AbsoluteTableIdentifier.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/AbsoluteTableIdentifier.java @@ -36,6 +36,11 @@ public class AbsoluteTableIdentifier implements Serializable { */ private String tablePath; + /** + * dictionary path of the table + */ + private String dictionaryPath; + /** * carbon table identifier which will have table name and table database @@ -146,4 +151,11 @@ public class AbsoluteTableIdentifier implements Serializable { return carbonTableIdentifier.toString(); } + public String getDictionaryPath() { +return dictionaryPath; + } + + public void setDictionaryPath(String dictionaryPath) { +this.dictionaryPath = dictionaryPath; + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/a37a2ff7/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java index 995f943..850a791 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java @@ -805,7 +805,11 @@ public class CarbonTable implements Serializable { * @return absolute table identifier */ public AbsoluteTableIdentifier getAbsoluteTableIdentifier() { -return tableInfo.getOrCreateAbsoluteTableIdentifier(); +AbsoluteTableIdentifier absoluteTableIdentifier = +tableInfo.getOrCreateAbsoluteTableIdentifier(); +absoluteTableIdentifier.setDictionaryPath( + tableInfo.getFactTable().getTableProperties().get(CarbonCommonConstants.DICTIONARY_PATH)); +return absoluteTableIdentifier; } /** http://git-wip-us.apache.org/repos/asf/carbondata/blob/a37a2ff7/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java index 0587b33..bae608f 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java @@ -56,12 +56,10 @@ import org.apache.carbondata.core.keygenerator.KeyGenerator; import org.apache.carbondata.core.keygenerator.factory.KeyGeneratorFactory; import org.apache.carbondata.core.keygenerator.mdkey.MultiDimKeyVarLengthGenerator; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; -import org.apache.carbondata.core.metadata.CarbonMetadata; import org.apache.carbondata.core.metadata.ColumnIdentifier; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.encoder.Encoding; -import org.apache.carbondata.core.metadata.schema.table.CarbonTable; import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; imp
carbondata git commit: [CARBONDATA-2621][BloomDataMap] Lock problem in index datamap
Repository: carbondata Updated Branches: refs/heads/master b853963b2 -> bea277f83 [CARBONDATA-2621][BloomDataMap] Lock problem in index datamap Problem The locking for the index Datamap is not correct. The HDFS lock is not working properly, because the lock is getting created the the local filesystem instead of HDFS. Solution Corrected the system folder path. This closes #2387 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bea277f8 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bea277f8 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bea277f8 Branch: refs/heads/master Commit: bea277f83001c6d3f0cc3702bd35fa90435dd400 Parents: b853963 Author: mohammadshahidkhan Authored: Wed Jun 20 15:48:17 2018 +0530 Committer: manishgupta88 Committed: Tue Jul 24 15:34:56 2018 +0530 -- .../apache/carbondata/core/util/CarbonProperties.java| 3 +++ .../table/DiskBasedDMSchemaStoraheProviderSuite.java | 2 +- .../carbondata/hadoop/ft/CarbonTableInputFormatTest.java | 3 +++ .../hadoop/ft/CarbonTableOutputFormatTest.java | 2 ++ .../presto/integrationtest/PrestoAllDataTypeTest.scala | 5 + .../datamap/lucene/LuceneFineGrainDataMapSuite.scala | 2 -- .../LuceneFineGrainDataMapWithSearchModeSuite.scala | 6 -- .../org/apache/spark/sql/test/TestQueryExecutor.scala| 6 ++ .../apache/carbondata/sdk/file/AvroCarbonWriterTest.java | 11 +++ .../apache/carbondata/sdk/file/CSVCarbonWriterTest.java | 11 +++ .../sdk/file/CSVNonTransactionalCarbonWriterTest.java| 11 +++ .../org/apache/carbondata/sdk/file/CarbonReaderTest.java | 10 ++ .../apache/carbondata/store/LocalCarbonStoreTest.java| 11 +++ 13 files changed, 74 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/bea277f8/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java index f7ace5e..004eb74 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java @@ -34,6 +34,7 @@ import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.constants.CarbonLoadOptionConstants; import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; +import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.metadata.ColumnarFormatVersion; import static org.apache.carbondata.core.constants.CarbonCommonConstants.BLOCKLET_SIZE; import static org.apache.carbondata.core.constants.CarbonCommonConstants.CARBON_CUSTOM_BLOCK_DISTRIBUTION; @@ -1531,6 +1532,8 @@ public final class CarbonProperties { if (systemLocation == null) { systemLocation = getStorePath(); } +systemLocation = CarbonUtil.checkAndAppendFileSystemURIScheme(systemLocation); +systemLocation = FileFactory.getUpdatedFilePath(systemLocation); return systemLocation + CarbonCommonConstants.FILE_SEPARATOR + "_system"; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/bea277f8/core/src/test/java/org/apache/carbondata/core/metadata/schema/table/DiskBasedDMSchemaStoraheProviderSuite.java -- diff --git a/core/src/test/java/org/apache/carbondata/core/metadata/schema/table/DiskBasedDMSchemaStoraheProviderSuite.java b/core/src/test/java/org/apache/carbondata/core/metadata/schema/table/DiskBasedDMSchemaStoraheProviderSuite.java index 709215a..9af7f8f 100644 --- a/core/src/test/java/org/apache/carbondata/core/metadata/schema/table/DiskBasedDMSchemaStoraheProviderSuite.java +++ b/core/src/test/java/org/apache/carbondata/core/metadata/schema/table/DiskBasedDMSchemaStoraheProviderSuite.java @@ -37,7 +37,7 @@ public class DiskBasedDMSchemaStoraheProviderSuite { @BeforeClass public static void setUp() throws IOException { String path = -new File(DiskBasedDMSchemaStorageProvider.class.getResource("/").getPath() + "../") +new File(DiskBasedDMSchemaStoraheProviderSuite.class.getResource("/").getPath() + "../") .getCanonicalPath().replaceAll("", "/"); CarbonProperties.getInstance() http://git-wip-us.apache.org/repos/asf/carbondata/blob/bea277f8/hadoop/src/
carbondata git commit: [CARBONDATA-2753] Fix Compatibility issues on index Files with 1.3 store
Repository: carbondata Updated Branches: refs/heads/master 45960f4a8 -> 7ab670652 [CARBONDATA-2753] Fix Compatibility issues on index Files with 1.3 store Problem: Currently,in the segmentFile we are writing the index files list in files field, only if it exists, otherwise it will be empty(in case if it is merged to merge index file). But in the old store, we were writing both the files and mergeFileName fields even if the index files are merged. Solution: While querying we have to check the physical existence of the index files listed in the files field. If it physically exists, then we have to consider that. This closes #2534 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7ab67065 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7ab67065 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7ab67065 Branch: refs/heads/master Commit: 7ab6706523d2da008585b41f8587762f94c1bdd4 Parents: 45960f4 Author: dhatchayani Authored: Fri Jul 20 20:06:00 2018 +0530 Committer: manishgupta88 Committed: Mon Jul 23 18:27:51 2018 +0530 -- .../carbondata/core/metadata/SegmentFileStore.java | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/7ab67065/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java index 9681e37..28ac47e 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java @@ -580,7 +580,7 @@ public class SegmentFileStore { * Gets all index files from this segment * @return */ - public Map getIndexOrMergeFiles() { + public Map getIndexOrMergeFiles() throws IOException { Map indexFiles = new HashMap<>(); if (segmentFile != null) { for (Map.Entry entry : getLocationMap().entrySet()) { @@ -597,7 +597,14 @@ public class SegmentFileStore { Set files = entry.getValue().getFiles(); if (null != files && !files.isEmpty()) { for (String indexFile : files) { - indexFiles.put(location + CarbonCommonConstants.FILE_SEPARATOR + indexFile, null); + String indexFilePath = location + CarbonCommonConstants.FILE_SEPARATOR + indexFile; + // In the 1.3 store, files field contain the carbonindex files names + // even if they are merged to a carbonindexmerge file. In that case we have to check + // for the physical existence of the file to decide + // on whether it is already merged or not. + if (FileFactory.isFileExist(indexFilePath)) { +indexFiles.put(indexFilePath, null); + } } } }
carbondata git commit: [CARBONDATA-2734] Update is not working on the table which has segmentfile present
Repository: carbondata Updated Branches: refs/heads/master a20f22eda -> 0c363bd18 [CARBONDATA-2734] Update is not working on the table which has segmentfile present It fixes the IUD on the flat folder This closes #2503 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0c363bd1 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0c363bd1 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0c363bd1 Branch: refs/heads/master Commit: 0c363bd18ae29f13dffb04acb3c2193d9befd1c2 Parents: a20f22e Author: ravipesala Authored: Fri Jul 13 13:15:15 2018 +0530 Committer: manishgupta88 Committed: Mon Jul 23 08:39:58 2018 +0530 -- .../core/mutate/CarbonUpdateUtil.java | 4 ++-- .../executor/impl/AbstractQueryExecutor.java| 5 +++-- .../SegmentUpdateStatusManager.java | 12 +-- .../apache/carbondata/core/util/CarbonUtil.java | 19 ++--- .../FlatFolderTableLoadingTestCase.scala| 21 +++ .../iud/DeleteCarbonTableTestCase.scala | 22 +++- .../iud/UpdateCarbonTableTestCase.scala | 13 .../command/mutation/DeleteExecution.scala | 14 +++-- 8 files changed, 81 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/0c363bd1/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java index d0a204c..4a8d2e8 100644 --- a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java @@ -81,10 +81,10 @@ public class CarbonUpdateUtil { /** * Returns block path from tuple id */ - public static String getTableBlockPath(String tid, String tablePath, boolean isPartitionTable) { + public static String getTableBlockPath(String tid, String tablePath, boolean isStandardTable) { String partField = getRequiredFieldFromTID(tid, TupleIdEnum.PART_ID); // If it has segment file then partfield can be appended directly to table path -if (isPartitionTable) { +if (!isStandardTable) { return tablePath + CarbonCommonConstants.FILE_SEPARATOR + partField.replace("#", "/"); } String part = CarbonTablePath.addPartPrefix(partField); http://git-wip-us.apache.org/repos/asf/carbondata/blob/0c363bd1/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java index 180ca4d..910ae3e 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java @@ -344,11 +344,12 @@ public abstract class AbstractQueryExecutor implements QueryExecutor { queryModel.getProjectionDimensions(), tableBlockDimensions, segmentProperties.getComplexDimensions(), queryModel.getProjectionMeasures().size(), queryModel.getTable().getTableInfo().isTransactionalTable()); +boolean isStandardTable = CarbonUtil.isStandardCarbonTable(queryModel.getTable()); String blockId = CarbonUtil .getBlockId(queryModel.getAbsoluteTableIdentifier(), filePath, segment.getSegmentNo(), queryModel.getTable().getTableInfo().isTransactionalTable(), -queryModel.getTable().isHivePartitionTable()); -if (queryModel.getTable().isHivePartitionTable()) { +isStandardTable); +if (!isStandardTable) { blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockIdForPartitionTable(blockId)); } else { blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockId(blockId)); http://git-wip-us.apache.org/repos/asf/carbondata/blob/0c363bd1/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java index 55381fb..0c2098a 100644 --- a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java +++ b/core/src/main/java/org
carbondata git commit: [CARBONDATA-2754] Fixed testcases if HiveMetastore is enabled
Repository: carbondata Updated Branches: refs/heads/master ce2d1a3da -> 9f42fbf33 [CARBONDATA-2754] Fixed testcases if HiveMetastore is enabled Fixed testcase for if HiveMetastore is enabled This closes #2518 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9f42fbf3 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9f42fbf3 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9f42fbf3 Branch: refs/heads/master Commit: 9f42fbf333295cad1b4b052bf0d1cabb8ee21bc3 Parents: ce2d1a3 Author: rahul Authored: Tue Jul 17 19:19:27 2018 +0530 Committer: manishgupta88 Committed: Fri Jul 20 16:13:44 2018 +0530 -- .../createTable/TestCreateExternalTable.scala | 46 ++-- .../iud/DeleteCarbonTableTestCase.scala | 2 +- .../carbondata/store/SparkCarbonStore.scala | 16 --- .../carbondata/store/SparkCarbonStoreTest.scala | 8 ++-- .../apache/spark/util/CarbonCommandSuite.scala | 3 +- .../apache/carbondata/store/CarbonStore.java| 5 ++- .../carbondata/store/LocalCarbonStore.java | 15 --- .../carbondata/store/LocalCarbonStoreTest.java | 4 +- 8 files changed, 63 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/9f42fbf3/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala index 3b21d0a..519089b 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateExternalTable.scala @@ -23,6 +23,9 @@ import org.apache.spark.sql.{AnalysisException, CarbonEnv} import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + class TestCreateExternalTable extends QueryTest with BeforeAndAfterAll { var originDataPath: String = _ @@ -43,25 +46,40 @@ class TestCreateExternalTable extends QueryTest with BeforeAndAfterAll { test("create external table with existing files") { assert(new File(originDataPath).exists()) sql("DROP TABLE IF EXISTS source") +if (CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.ENABLE_HIVE_SCHEMA_META_STORE, + CarbonCommonConstants.ENABLE_HIVE_SCHEMA_META_STORE_DEFAULT).equalsIgnoreCase("false")) { -// create external table with existing files -sql( - s""" - |CREATE EXTERNAL TABLE source - |STORED BY 'carbondata' - |LOCATION '$storeLocation/origin' + // create external table with existing files + sql( +s""" + |CREATE EXTERNAL TABLE source + |STORED BY 'carbondata' + |LOCATION '$storeLocation/origin' """.stripMargin) -checkAnswer(sql("SELECT count(*) from source"), sql("SELECT count(*) from origin")) + checkAnswer(sql("SELECT count(*) from source"), sql("SELECT count(*) from origin")) -checkExistence(sql("describe formatted source"), true, storeLocation+"/origin") + checkExistence(sql("describe formatted source"), true, storeLocation + "/origin") -val carbonTable = CarbonEnv.getCarbonTable(None, "source")(sqlContext.sparkSession) -assert(carbonTable.isExternalTable) - -sql("DROP TABLE IF EXISTS source") + val carbonTable = CarbonEnv.getCarbonTable(None, "source")(sqlContext.sparkSession) + assert(carbonTable.isExternalTable) -// DROP TABLE should not delete data -assert(new File(originDataPath).exists()) + sql("DROP TABLE IF EXISTS source") + + // DROP TABLE should not delete data + assert(new File(originDataPath).exists()) +} +else { + intercept[Exception] { +// create external table with existing files +sql( + s""" + |CREATE EXTERNAL TABLE source + |STORED BY 'carbondata' + |LOCATION '$storeLocation/origin' + &qu
carbondata git commit: [CARBONDATA-2710][Spark Integration] Refactor CarbonSparkSqlParser for better code reuse.
Repository: carbondata Updated Branches: refs/heads/master a4c2ef5f8 -> 5aada46e7 [CARBONDATA-2710][Spark Integration] Refactor CarbonSparkSqlParser for better code reuse. Refactor CarbonSparkSqlParser for better code reuse This closes #2466 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5aada46e Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5aada46e Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5aada46e Branch: refs/heads/master Commit: 5aada46e7bb6bcbb11652979862e3ccebaa6e3e8 Parents: a4c2ef5 Author: mohammadshahidkhan Authored: Mon Jul 9 16:08:47 2018 +0530 Committer: manishgupta88 Committed: Wed Jul 18 16:20:30 2018 +0530 -- .../spark/sql/parser/CarbonSparkSqlParser.scala | 293 ++- .../sql/parser/CarbonSparkSqlParserUtil.scala | 367 +++ .../spark/sql/hive/CarbonSessionState.scala | 4 +- .../spark/sql/hive/CarbonSqlAstBuilder.scala| 4 +- 4 files changed, 397 insertions(+), 271 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/5aada46e/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala index 4cc0e1b..39dce3a 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala @@ -16,35 +16,25 @@ */ package org.apache.spark.sql.parser -import scala.collection.JavaConverters._ import scala.collection.mutable import org.antlr.v4.runtime.tree.TerminalNode -import org.apache.spark.sql.{CarbonEnv, CarbonSession, SparkSession} -import org.apache.spark.sql.catalyst.parser.{AbstractSqlParser, ParseException, SqlBaseParser} -import org.apache.spark.sql.catalyst.parser.ParserUtils._ +import org.apache.spark.sql.{CarbonSession, SparkSession} +import org.apache.spark.sql.catalyst.parser.{AbstractSqlParser, SqlBaseParser} import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkSqlAstBuilder -import org.apache.spark.sql.execution.command.{PartitionerField, TableModel, TableNewProcessor} -import org.apache.spark.sql.execution.command.table.{CarbonCreateTableAsSelectCommand, CarbonCreateTableCommand} +import org.apache.spark.sql.execution.command.PartitionerField import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution} import org.apache.spark.sql.types.StructField import org.apache.spark.sql.util.CarbonException import org.apache.spark.util.CarbonReflectionUtils import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException -import org.apache.carbondata.core.constants.CarbonCommonConstants -import org.apache.carbondata.core.datastore.impl.FileFactory -import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier -import org.apache.carbondata.core.metadata.datatype.DataTypes -import org.apache.carbondata.core.metadata.schema.SchemaReader -import org.apache.carbondata.core.util.path.CarbonTablePath -import org.apache.carbondata.spark.CarbonOption -import org.apache.carbondata.spark.util.{CarbonScalaUtil, CommonUtil} +import org.apache.carbondata.spark.util.CarbonScalaUtil /** - * Concrete parser for Spark SQL stateENABLE_INMEMORY_MERGE_SORT_DEFAULTments and carbon specific + * Concrete parser for Spark SQL statements and carbon specific * statements */ class CarbonSparkSqlParser(conf: SQLConf, sparkSession: SparkSession) extends AbstractSqlParser { @@ -90,60 +80,12 @@ class CarbonHelperSqlAstBuilder(conf: SQLConf, parser: CarbonSpark2SqlParser, sparkSession: SparkSession) extends SparkSqlAstBuilder(conf) { - - def getFileStorage(createFileFormat: CreateFileFormatContext): String = { -Option(createFileFormat) match { - case Some(value) => -val result = value.children.get(1).getText -if (result.equalsIgnoreCase("by")) { - value.storageHandler().STRING().getSymbol.getText -} else if (result.equalsIgnoreCase("as") && value.children.size() > 1) { - value.children.get(2).getText -} else { - // The case of "STORED AS PARQUET/ORC" - "" -} - case _ => "" -} - } - - /** - * This method will convert the database name to lower case - * - * @param dbName - * @return Option of String - */ - def convertDbNameToL
carbondata git commit: [CARBONDATA-2704] Index file size in describe formatted command is not updated correctly with the segment file
Repository: carbondata Updated Branches: refs/heads/master cdee81d4d -> eb604fdb7 [CARBONDATA-2704] Index file size in describe formatted command is not updated correctly with the segment file Problem: Describe formatted command is not showing correct index files size after index files merge. Solution: Segment file should be updated with the actual index files size of that segment after index files merge. This closes #2462 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/eb604fdb Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/eb604fdb Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/eb604fdb Branch: refs/heads/master Commit: eb604fdb73983dfe9396d488a51907d90ed51d3e Parents: cdee81d Author: dhatchayani Authored: Mon Jul 9 11:19:51 2018 +0530 Committer: manishgupta88 Committed: Sun Jul 15 20:34:32 2018 +0530 -- .../core/metadata/SegmentFileStore.java | 4 +- .../apache/carbondata/core/util/CarbonUtil.java | 48 --- .../core/writer/CarbonIndexFileMergeWriter.java | 17 +++ .../CarbonIndexFileMergeTestCase.scala | 50 .../spark/rdd/CarbonDataRDDFactory.scala| 4 +- 5 files changed, 96 insertions(+), 27 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/eb604fdb/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java index 3d3b245..ce79e65 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java @@ -281,7 +281,7 @@ public class SegmentFileStore { * @throws IOException */ public static boolean updateSegmentFile(String tablePath, String segmentId, String segmentFile, - String tableId) throws IOException { + String tableId, SegmentFileStore segmentFileStore) throws IOException { boolean status = false; String tableStatusPath = CarbonTablePath.getTableStatusFilePath(tablePath); if (!FileFactory.isFileExist(tableStatusPath)) { @@ -308,6 +308,8 @@ public class SegmentFileStore { // if the segments is in the list of marked for delete then update the status. if (segmentId.equals(detail.getLoadName())) { detail.setSegmentFile(segmentFile); +detail.setIndexSize(String.valueOf(CarbonUtil +.getCarbonIndexSize(segmentFileStore, segmentFileStore.getLocationMap(; break; } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/eb604fdb/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index e87e52c..9796696 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -2647,23 +2647,7 @@ public final class CarbonUtil { fileStore.readIndexFiles(); Map> indexFilesMap = fileStore.getIndexFilesMap(); // get the size of carbonindex file - for (Map.Entry entry : locationMap.entrySet()) { -SegmentFileStore.FolderDetails folderDetails = entry.getValue(); -Set carbonindexFiles = folderDetails.getFiles(); -String mergeFileName = folderDetails.getMergeFileName(); -if (null != mergeFileName) { - String mergeIndexPath = - fileStore.getTablePath() + entry.getKey() + CarbonCommonConstants.FILE_SEPARATOR - + mergeFileName; - carbonIndexSize += FileFactory.getCarbonFile(mergeIndexPath).getSize(); -} -for (String indexFile : carbonindexFiles) { - String indexPath = - fileStore.getTablePath() + entry.getKey() + CarbonCommonConstants.FILE_SEPARATOR - + indexFile; - carbonIndexSize += FileFactory.getCarbonFile(indexPath).getSize(); -} - } + carbonIndexSize = getCarbonIndexSize(fileStore, locationMap); for (Map.Entry> entry : indexFilesMap.entrySet()) { // get the size of carbondata files for (String blockFile : entry.getValue()) { @@ -2676,6 +2660,36 @@ public final class CarbonUtil { return dataAndIndexSize; } + /** + * Calcuate the index files size of the segment + * + * @param fileStore + * @param locationMap +
[2/2] carbondata git commit: [CARBONDATA-2684] [PR-2442] Distinct count fails on complex columns
[CARBONDATA-2684] [PR-2442] Distinct count fails on complex columns This PR fixes Code Generator Error thrown when Select filter contains more than one count of distinct of ComplexColumn with group by Clause This closes #2449 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/19a99e15 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/19a99e15 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/19a99e15 Branch: refs/heads/master Commit: 19a99e15e5c9489ded3013cbc6bb02cab38d0720 Parents: cbecadf Author: ravipesala Authored: Wed Jul 4 19:57:54 2018 +0530 Committer: manishgupta88 Committed: Fri Jul 6 10:35:06 2018 +0530 -- .../complexType/TestComplexDataType.scala | 2 +- .../sql/optimizer/CarbonLateDecodeRule.scala| 31 +++- 2 files changed, 24 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/19a99e15/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala index ba0dc66..ab574c9 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala @@ -645,7 +645,7 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll { "sno:array,sal:array,state:array,date1:array>) stored by " + "'carbondata'") sql("insert into test values('cus_01','1$2017/01/01$1:2$2.0:3.0$ab:ac$2018/01/01')") -sql("select *from test").show(false) +//sql("select *from test").show(false) sql( "select struct_of_array.state[0],count(distinct struct_of_array.id) as count_int,count" + "(distinct struct_of_array.state[0]) as count_string from test group by struct_of_array" + http://git-wip-us.apache.org/repos/asf/carbondata/blob/19a99e15/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala index 68e7f07..9738ab0 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.profiler.{Optimizer, Profiler} -import org.apache.spark.sql.types.{IntegerType, StringType} +import org.apache.spark.sql.types._ import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.constants.CarbonCommonConstants @@ -90,12 +90,7 @@ class CarbonLateDecodeRule extends Rule[LogicalPlan] with PredicateHelper { LOGGER.info("skip CarbonOptimizer for scalar/predicate sub query") return false } - if(relations.exists(_.dictionaryMap.dictionaryMap.exists(_._2))) { -true - } else { -false - } - + true } else { LOGGER.info("skip CarbonOptimizer") false @@ -678,7 +673,9 @@ class CarbonLateDecodeRule extends Rule[LogicalPlan] with PredicateHelper { val updatedProj = ex.projections.map { projs => projs.zipWithIndex.map { case(p, index) => p.transform { - case l: Literal if l.dataType != ex.output(index).dataType => + case l: Literal +if l.dataType != ex.output(index).dataType && + !isComplexColumn(ex.output(index), ex.child.output) => Literal(l.value, ex.output(index).dataType) } } @@ -734,6 +731,24 @@ class CarbonLateDecodeRule extends Rule[LogicalPlan] with PredicateHelper { } } + /** + * Check whether given column is derived from complex column. + */ + def isComplexColumn(attribute: Attribu
[1/2] carbondata git commit: Code Generator Error is thrown when Select filter contains more than one count of distinct of ComplexColumn with group by Clause
Repository: carbondata Updated Branches: refs/heads/master 020335a8c -> 19a99e15e Code Generator Error is thrown when Select filter contains more than one count of distinct of ComplexColumn with group by Clause Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cbecadfa Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cbecadfa Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cbecadfa Branch: refs/heads/master Commit: cbecadfad2408c18322e87aa5ac054d6d190d34c Parents: 020335a Author: Indhumathi27 Authored: Tue Jul 3 22:31:40 2018 +0530 Committer: manishgupta88 Committed: Fri Jul 6 10:34:47 2018 +0530 -- .../complexType/TestComplexDataType.scala | 20 .../sql/optimizer/CarbonLateDecodeRule.scala| 7 ++- 2 files changed, 26 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbecadfa/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala index 524289c..ba0dc66 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala @@ -636,4 +636,24 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll { sql("select b.c[0],a[0][0] from test").show(false) } + test("test structofarray with count(distinct)") { +sql("DROP TABLE IF EXISTS test") +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "/MM/dd") +sql( + "create table test(cus_id string, struct_of_array struct,sal:array,state:array,date1:array>) stored by " + + "'carbondata'") +sql("insert into test values('cus_01','1$2017/01/01$1:2$2.0:3.0$ab:ac$2018/01/01')") +sql("select *from test").show(false) +sql( + "select struct_of_array.state[0],count(distinct struct_of_array.id) as count_int,count" + + "(distinct struct_of_array.state[0]) as count_string from test group by struct_of_array" + + ".state[0]") + .show(false) +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, +CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) + } + } http://git-wip-us.apache.org/repos/asf/carbondata/blob/cbecadfa/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala index 7ed1705..68e7f07 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/optimizer/CarbonLateDecodeRule.scala @@ -90,7 +90,12 @@ class CarbonLateDecodeRule extends Rule[LogicalPlan] with PredicateHelper { LOGGER.info("skip CarbonOptimizer for scalar/predicate sub query") return false } - true + if(relations.exists(_.dictionaryMap.dictionaryMap.exists(_._2))) { +true + } else { +false + } + } else { LOGGER.info("skip CarbonOptimizer") false
carbondata git commit: [CARBONDATA-2623][DataMap] Add DataMap Pre and Pevent listener
Repository: carbondata Updated Branches: refs/heads/master 55f4bc6c8 -> b3f782062 [CARBONDATA-2623][DataMap] Add DataMap Pre and Pevent listener Added Pre and Post Execution Events for index datamap This closes #2389 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b3f78206 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b3f78206 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b3f78206 Branch: refs/heads/master Commit: b3f7820623d4bc9ab4408beb8ad708ba9b19b899 Parents: 55f4bc6 Author: mohammadshahidkhan Authored: Wed Jun 20 19:52:51 2018 +0530 Committer: manishgupta88 Committed: Thu Jun 21 17:37:48 2018 +0530 -- .../carbondata/events/DataMapEvents.scala | 68 .../org/apache/carbondata/events/Events.scala | 18 +- .../datamap/IndexDataMapRebuildRDD.scala| 11 +++- .../spark/rdd/CarbonTableCompactor.scala| 23 ++- .../datamap/CarbonCreateDataMapCommand.scala| 22 +++ .../datamap/CarbonDataMapRebuildCommand.scala | 12 .../datamap/CarbonDropDataMapCommand.scala | 11 .../management/CarbonLoadDataCommand.scala | 21 +- 8 files changed, 181 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/b3f78206/integration/spark-common/src/main/scala/org/apache/carbondata/events/DataMapEvents.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/events/DataMapEvents.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/events/DataMapEvents.scala new file mode 100644 index 000..8fb374f --- /dev/null +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/events/DataMapEvents.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.events + +import org.apache.spark.sql.SparkSession + +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier + +/** + * For handling operation's after finish of index creation over table with index datamap + * example: bloom datamap, Lucene datamap + */ +case class CreateDataMapPostExecutionEvent(sparkSession: SparkSession, +storePath: String) extends Event with CreateDataMapEventsInfo + +/** + * For handling operation's before start of update index datmap status over table with index datamap + * example: bloom datamap, Lucene datamap + */ +case class UpdateDataMapPreExecutionEvent(sparkSession: SparkSession, +storePath: String) extends Event with CreateDataMapEventsInfo + +/** + * For handling operation's after finish of update index datmap status over table with index + * datamap + * example: bloom datamap, Lucene datamap + */ +case class UpdateDataMapPostExecutionEvent(sparkSession: SparkSession, +storePath: String) extends Event with CreateDataMapEventsInfo + +/** + * For handling operation's before start of index build over table with index datamap + * example: bloom datamap, Lucene datamap + */ +case class BuildDataMapPreExecutionEvent(sparkSession: SparkSession, +identifier: AbsoluteTableIdentifier, dataMapNames: scala.collection.mutable.Seq[String]) + extends Event with BuildDataMapEventsInfo + +/** + * For handling operation's after finish of index build over table with index datamap + * example: bloom datamap, Lucene datamap + */ +case class BuildDataMapPostExecutionEvent(sparkSession: SparkSession, +identifier: AbsoluteTableIdentifier) + extends Event with TableEventInfo + +/** + * For handling operation's before start of index creation over table with index datamap + * example: bloom datamap, Lucene datamap + */ +case class CreateDataMapPreExecutionEvent(sparkSession: SparkSession, +storePath: String) extends Event with CreateDataMapEventsInfo + http://git-wip-us.apache.org/repos/asf/carbondata/blob/b3f78206/integration/spark-common/src
carbondata git commit: [CARBONDATA-2617] Invalid tuple-id and block id getting formed for Non partition table
Repository: carbondata Updated Branches: refs/heads/master dc53dee24 -> 0e1d550e8 [CARBONDATA-2617] Invalid tuple-id and block id getting formed for Non partition table Problem Invalid tuple and block id getting formed for non partition table Analysis While creating a partition table a segment file was written in the Metadata folder under table structure. This was introduced during development of partition table feature. At that time segment file was written only for partition table and it was used to distinguish between parition and non partition table in the code. But later the code was modified to write the segment file for both parititon and non partition table and the code to distinguish partition and non partition table was not modified which is causing this incorrect formation of block and tuple id. Fix Modify the logic to distinguish partitioned and non partitioned table and the same has been handled in this PR. This closes #2385 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0e1d550e Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0e1d550e Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0e1d550e Branch: refs/heads/master Commit: 0e1d550e8dacba798e9ffbdda25c4388e8933632 Parents: dc53dee Author: rahul Authored: Tue Jun 19 19:23:26 2018 +0530 Committer: manishgupta88 Committed: Wed Jun 20 16:37:23 2018 +0530 -- .../core/mutate/CarbonUpdateUtil.java | 4 +- .../executor/impl/AbstractQueryExecutor.java| 4 +- .../SegmentUpdateStatusManager.java | 20 ++--- .../apache/carbondata/core/util/CarbonUtil.java | 4 +- .../iud/DeleteCarbonTableTestCase.scala | 83 .../command/mutation/DeleteExecution.scala | 6 +- 6 files changed, 100 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/0e1d550e/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java index 40d498c..8627bdb 100644 --- a/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java @@ -81,10 +81,10 @@ public class CarbonUpdateUtil { /** * Returns block path from tuple id */ - public static String getTableBlockPath(String tid, String tablePath, boolean isSegmentFile) { + public static String getTableBlockPath(String tid, String tablePath, boolean isPartitionTable) { String partField = getRequiredFieldFromTID(tid, TupleIdEnum.PART_ID); // If it has segment file then partfield can be appended directly to table path -if (isSegmentFile) { +if (isPartitionTable) { return tablePath + CarbonCommonConstants.FILE_SEPARATOR + partField.replace("#", "/"); } String part = CarbonTablePath.addPartPrefix(partField); http://git-wip-us.apache.org/repos/asf/carbondata/blob/0e1d550e/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java index 2bbe75c..f365045 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java @@ -296,8 +296,8 @@ public abstract class AbstractQueryExecutor implements QueryExecutor { String blockId = CarbonUtil .getBlockId(queryModel.getAbsoluteTableIdentifier(), filePath, segment.getSegmentNo(), queryModel.getTable().getTableInfo().isTransactionalTable(), -segment.getSegmentFileName() != null); -if (segment.getSegmentFileName() != null) { +queryModel.getTable().isHivePartitionTable()); +if (queryModel.getTable().isHivePartitionTable()) { blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockIdForPartitionTable(blockId)); } else { blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockId(blockId)); http://git-wip-us.apache.org/repos/asf/carbondata/blob/0e1d550e/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentUpdateStatusManager.java
carbondata git commit: [CARBONDATA-2604] Getting ArrayIndexOutOfBoundException during compaction after IUD in cluster is fixed
Repository: carbondata Updated Branches: refs/heads/master ff0364599 -> efad40d57 [CARBONDATA-2604] Getting ArrayIndexOutOfBoundException during compaction after IUD in cluster is fixed Issue: if some records are deleted then during filling the measure and dimension data no of valid rows count and actual rows may be different if some records are deleted . and during filling of measure data it will iterrate over the scanned result. so it will cause ArrayIndexOutOfBoundException Solution : Make a new temp list to collect the measure and dimension data during scan and fill inside RawBasedResultCollector and add it to final list This closes #2369 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/efad40d5 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/efad40d5 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/efad40d5 Branch: refs/heads/master Commit: efad40d5723849a351ec700e8e4e346cac8c3454 Parents: ff03645 Author: rahul Authored: Tue Jun 12 19:26:40 2018 +0530 Committer: manishgupta88 Committed: Wed Jun 13 20:38:24 2018 +0530 -- .../collector/impl/RawBasedResultCollector.java | 12 +--- .../sdv/generated/DataLoadingIUDTestCase.scala | 19 +++ 2 files changed, 28 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/efad40d5/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java index d28df0a..7302b2c 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java @@ -92,16 +92,22 @@ public class RawBasedResultCollector extends AbstractScannedResultCollector { // re initialized with left over value batchSize = 0; } + // for every iteration of available rows filling newly created list of Object[] and add it to + // the final list so there is no mismatch in the counter while filling dimension and + // measure data + List collectedData = new ArrayList<>(availableBatchRowCount); // fill dimension data - fillDimensionData(scannedResult, listBasedResult, queryMeasures, availableBatchRowCount); - fillMeasureData(scannedResult, listBasedResult); + fillDimensionData(scannedResult, collectedData, queryMeasures, availableBatchRowCount); + fillMeasureData(scannedResult, collectedData); // increment the number of rows scanned in scanned result statistics incrementScannedResultRowCounter(scannedResult, availableBatchRowCount); // assign the left over rows to batch size if the number of rows fetched are lesser // than batchSize - if (listBasedResult.size() < availableBatchRowCount) { + if (collectedData.size() < availableBatchRowCount) { batchSize += availableBatchRowCount - listBasedResult.size(); } + // add the collected data to the final list + listBasedResult.addAll(collectedData); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/efad40d5/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala -- diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala index 4c232be..79458f5 100644 --- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala @@ -3671,6 +3671,23 @@ test("HQ_Defect_TC_2016110901163", Include) { sql(s"""drop table default.t_carbn01 """).collect } + test("[CARBONDATA-2604] ", Include){ +sql("drop table if exists brinjal").collect +sql("create table brinjal (imei string,AMSize string,channelsId string,ActiveCountry string, Activecity string,gamePointId double,deviceInformationId double,productionDate Timestamp,deliveryDate timestamp,deliverycharge double) STORED BY 'org.apache.carbondata.format' TBLPROPERTI
carbondata git commit: [CARBONDATA-2571] Calculating the carbonindex and carbondata file size of a table is wrong
Repository: carbondata Updated Branches: refs/heads/master 92d9b9256 -> 27d705998 [CARBONDATA-2571] Calculating the carbonindex and carbondata file size of a table is wrong Problem: While calculating the carbonindex files size, we are checking either index file or merge file. But in PR#2333, implementation is changed to fill both the file name and the merge file name. So, we have to consider both fields. Solution: While calculating the carbonindex files size, we have to consider both the files and mergeFileName fields. We should get the list of index files from these 2 fields and then calculate the size of the files. This closes #2358 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/27d70599 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/27d70599 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/27d70599 Branch: refs/heads/master Commit: 27d7059984962b97bcaf576fed496653932ea743 Parents: 92d9b92 Author: dhatchayani Authored: Fri Jun 1 15:13:38 2018 +0530 Committer: manishgupta88 Committed: Tue Jun 5 11:51:52 2018 +0530 -- .../apache/carbondata/core/util/CarbonUtil.java | 37 +++- 1 file changed, 20 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/27d70599/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index 1526047..5a7bce3 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -2688,27 +2688,30 @@ public final class CarbonUtil { throws IOException { long carbonDataSize = 0L; long carbonIndexSize = 0L; -List listOfFilesRead = new ArrayList<>(); HashMap dataAndIndexSize = new HashMap(); -if (fileStore.getLocationMap() != null) { +Map locationMap = fileStore.getLocationMap(); +if (locationMap != null) { fileStore.readIndexFiles(); - Map indexFiles = fileStore.getIndexFiles(); Map> indexFilesMap = fileStore.getIndexFilesMap(); - for (Map.Entry> entry : indexFilesMap.entrySet()) { -// get the size of carbonindex file -String indexFile = entry.getKey(); -String mergeIndexFile = indexFiles.get(indexFile); -if (null != mergeIndexFile) { - String mergeIndexPath = indexFile - .substring(0, indexFile.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR) + 1) - + mergeIndexFile; - if (!listOfFilesRead.contains(mergeIndexPath)) { -carbonIndexSize += FileFactory.getCarbonFile(mergeIndexPath).getSize(); -listOfFilesRead.add(mergeIndexPath); - } -} else { - carbonIndexSize += FileFactory.getCarbonFile(indexFile).getSize(); + // get the size of carbonindex file + for (Map.Entry entry : locationMap.entrySet()) { +SegmentFileStore.FolderDetails folderDetails = entry.getValue(); +Set carbonindexFiles = folderDetails.getFiles(); +String mergeFileName = folderDetails.getMergeFileName(); +if (null != mergeFileName) { + String mergeIndexPath = + fileStore.getTablePath() + entry.getKey() + CarbonCommonConstants.FILE_SEPARATOR + + mergeFileName; + carbonIndexSize += FileFactory.getCarbonFile(mergeIndexPath).getSize(); } +for (String indexFile : carbonindexFiles) { + String indexPath = + fileStore.getTablePath() + entry.getKey() + CarbonCommonConstants.FILE_SEPARATOR + + indexFile; + carbonIndexSize += FileFactory.getCarbonFile(indexPath).getSize(); +} + } + for (Map.Entry> entry : indexFilesMap.entrySet()) { // get the size of carbondata files for (String blockFile : entry.getValue()) { carbonDataSize += FileFactory.getCarbonFile(blockFile).getSize();
carbondata git commit: [HOTFIX] Changes in selecting the carbonindex files
Repository: carbondata Updated Branches: refs/heads/master d7773187f -> 22d5035c8 [HOTFIX] Changes in selecting the carbonindex files Currently, in the query flow while getting the index files we are checking for either mergeFileName or the list of files. After this change, we will be checking for both files and mergeFileName This closes #2333 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/22d5035c Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/22d5035c Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/22d5035c Branch: refs/heads/master Commit: 22d5035c84342e0c0b15a87abbdd4dca5e6d4976 Parents: d777318 Author: dhatchayani Authored: Tue May 22 17:26:37 2018 +0530 Committer: manishgupta88 Committed: Tue May 29 11:01:08 2018 +0530 -- .../core/metadata/SegmentFileStore.java | 18 -- .../core/writer/CarbonIndexFileMergeWriter.java | 2 ++ 2 files changed, 14 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/22d5035c/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java index d72ded3..acfc145 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java @@ -151,7 +151,8 @@ public class SegmentFileStore { CarbonFile segmentFolder = FileFactory.getCarbonFile(segmentPath); CarbonFile[] indexFiles = segmentFolder.listFiles(new CarbonFileFilter() { @Override public boolean accept(CarbonFile file) { -return file.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT); +return (file.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT) || file.getName() +.endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)); } }); if (indexFiles != null && indexFiles.length > 0) { @@ -160,7 +161,11 @@ public class SegmentFileStore { folderDetails.setRelative(true); folderDetails.setStatus(SegmentStatus.SUCCESS.getMessage()); for (CarbonFile file : indexFiles) { -folderDetails.getFiles().add(file.getName()); +if (file.getName().endsWith(CarbonTablePath.MERGE_INDEX_FILE_EXT)) { + folderDetails.setMergeFileName(file.getName()); +} else { + folderDetails.getFiles().add(file.getName()); +} } String segmentRelativePath = segmentPath.substring(tablePath.length(), segmentPath.length()); segmentFile.addPath(segmentRelativePath, folderDetails); @@ -508,10 +513,11 @@ public class SegmentFileStore { if (null != mergeFileName) { indexFiles.put(location + CarbonCommonConstants.FILE_SEPARATOR + mergeFileName, entry.getValue().mergeFileName); - } else { -for (String indexFile : entry.getValue().getFiles()) { - indexFiles.put(location + CarbonCommonConstants.FILE_SEPARATOR + indexFile, - entry.getValue().mergeFileName); + } + Set files = entry.getValue().getFiles(); + if (null != files && !files.isEmpty()) { +for (String indexFile : files) { + indexFiles.put(location + CarbonCommonConstants.FILE_SEPARATOR + indexFile, null); } } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/22d5035c/core/src/main/java/org/apache/carbondata/core/writer/CarbonIndexFileMergeWriter.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/writer/CarbonIndexFileMergeWriter.java b/core/src/main/java/org/apache/carbondata/core/writer/CarbonIndexFileMergeWriter.java index ceeb431..cb53c0b 100644 --- a/core/src/main/java/org/apache/carbondata/core/writer/CarbonIndexFileMergeWriter.java +++ b/core/src/main/java/org/apache/carbondata/core/writer/CarbonIndexFileMergeWriter.java @@ -21,6 +21,7 @@ import java.io.Serializable; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; @@ -140,6 +141,7 @@ public class CarbonIndexFileMergeWriter { } if (new Path(entry.getKey()).equals(new Path(location))) { segentry.getValue().setMergeFileName(mergeIndexFile); + segentry.getValue().setFiles(new HashSet()); break; } }
carbondata git commit: [CARBONDATA-2538] added filter while listing files from writer path
Repository: carbondata Updated Branches: refs/heads/master 8b80b12ec -> d7773187f [CARBONDATA-2538] added filter while listing files from writer path 1. Added filter to list only index and carbondata files. So even if the lock files are present proper exception can be thrown 2. Updated complex type docs This closes #2344 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d7773187 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d7773187 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d7773187 Branch: refs/heads/master Commit: d7773187f72c73b7f9514f13bce17de3c552247c Parents: 8b80b12 Author: kunal642 Authored: Fri May 25 16:21:45 2018 +0530 Committer: manishgupta88 Committed: Tue May 29 10:49:59 2018 +0530 -- .../core/metadata/schema/table/CarbonTable.java | 2 +- .../readcommitter/LatestFilesReadCommittedScope.java | 9 - .../java/org/apache/carbondata/core/util/CarbonUtil.java | 11 --- docs/supported-data-types-in-carbondata.md | 2 ++ .../command/table/CarbonDescribeFormattedCommand.scala | 2 +- 5 files changed, 20 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7773187/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java index 8528d6f..b1ed981 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java @@ -892,7 +892,7 @@ public class CarbonTable implements Serializable { public long size() throws IOException { -Map dataIndexSize = CarbonUtil.calculateDataIndexSize(this); +Map dataIndexSize = CarbonUtil.calculateDataIndexSize(this, true); Long dataSize = dataIndexSize.get(CarbonCommonConstants.CARBON_TOTAL_DATA_SIZE); if (dataSize == null) { dataSize = 0L; http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7773187/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java index 6106174..14bba65 100644 --- a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java +++ b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java @@ -26,6 +26,7 @@ import org.apache.carbondata.common.annotations.InterfaceAudience; import org.apache.carbondata.common.annotations.InterfaceStability; import org.apache.carbondata.core.datamap.Segment; import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore; import org.apache.carbondata.core.mutate.UpdateVO; @@ -138,7 +139,13 @@ public class LatestFilesReadCommittedScope implements ReadCommittedScope { @Override public void takeCarbonIndexFileSnapShot() throws IOException { // Read the current file Path get the list of indexes from the path. CarbonFile file = FileFactory.getCarbonFile(carbonFilePath); -if (file.listFiles().length == 0) { +CarbonFile[] files = file.listFiles(new CarbonFileFilter() { + @Override public boolean accept(CarbonFile file) { +return file.getName().endsWith(CarbonTablePath.INDEX_FILE_EXT) || file.getName() +.endsWith(CarbonTablePath.CARBON_DATA_EXT); + } +}); +if (files.length == 0) { // For nonTransactional table, files can be removed at any point of time. // So cannot assume files will be present throw new IOException("No files are present in the table location :" + carbonFilePath); http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7773187/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index 23d02ef..9ccd772 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/
carbondata git commit: [CARBONDATA-2514] Added condition to check for duplicate column names
Repository: carbondata Updated Branches: refs/heads/master 16ed99a11 -> cf666c17b [CARBONDATA-2514] Added condition to check for duplicate column names 1. Duplicate columns check was not present. 2. IndexFileReader was not being closed due to which index file could not be deleted. This closes #2332 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cf666c17 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cf666c17 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cf666c17 Branch: refs/heads/master Commit: cf666c17b8be9f11dd9c0b51503ca194162ee782 Parents: 16ed99a Author: kunal642 Authored: Tue May 22 15:16:32 2018 +0530 Committer: manishgupta88 Committed: Thu May 24 10:19:47 2018 +0530 -- .../apache/carbondata/core/util/CarbonUtil.java | 44 +++- .../carbondata/core/util/DataTypeUtil.java | 2 + .../sdk/file/CarbonWriterBuilder.java | 7 .../sdk/file/AvroCarbonWriterTest.java | 40 ++ 4 files changed, 73 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf666c17/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index 9dc4aa2..23d02ef 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -2380,27 +2380,31 @@ public final class CarbonUtil { public static org.apache.carbondata.format.TableInfo inferSchemaFromIndexFile( String indexFilePath, String tableName) throws IOException { CarbonIndexFileReader indexFileReader = new CarbonIndexFileReader(); -indexFileReader.openThriftReader(indexFilePath); -org.apache.carbondata.format.IndexHeader readIndexHeader = indexFileReader.readIndexHeader(); -List columnSchemaList = new ArrayList(); -List table_columns = -readIndexHeader.getTable_columns(); -for (int i = 0; i < table_columns.size(); i++) { - columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i))); +try { + indexFileReader.openThriftReader(indexFilePath); + org.apache.carbondata.format.IndexHeader readIndexHeader = indexFileReader.readIndexHeader(); + List columnSchemaList = new ArrayList(); + List table_columns = + readIndexHeader.getTable_columns(); + for (int i = 0; i < table_columns.size(); i++) { + columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i))); + } + // only columnSchema is the valid entry, reset all dummy entries. + TableSchema tableSchema = getDummyTableSchema(tableName, columnSchemaList); + + ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter = + new ThriftWrapperSchemaConverterImpl(); + org.apache.carbondata.format.TableSchema thriftFactTable = + thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema); + org.apache.carbondata.format.TableInfo tableInfo = + new org.apache.carbondata.format.TableInfo(thriftFactTable, + new ArrayList()); + + tableInfo.setDataMapSchemas(null); + return tableInfo; +} finally { + indexFileReader.closeThriftReader(); } -// only columnSchema is the valid entry, reset all dummy entries. -TableSchema tableSchema = getDummyTableSchema(tableName, columnSchemaList); - -ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter = -new ThriftWrapperSchemaConverterImpl(); -org.apache.carbondata.format.TableSchema thriftFactTable = - thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema); -org.apache.carbondata.format.TableInfo tableInfo = -new org.apache.carbondata.format.TableInfo(thriftFactTable, -new ArrayList()); - -tableInfo.setDataMapSchemas(null); -return tableInfo; } private static TableSchema getDummyTableSchema(String tableName, http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf666c17/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java index f7f71b3..e06c82e 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java @@ -341,6 +341,7 @@ public final class Dat
carbondata git commit: [CARBONDATA-2503] Data write fails if empty value is provided for sort columns in sdk is fixed
Repository: carbondata Updated Branches: refs/heads/master e1ef85ac7 -> 9aa3a8c22 [CARBONDATA-2503] Data write fails if empty value is provided for sort columns in sdk is fixed SortColumn with empty value was giving exception This closes #2326 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9aa3a8c2 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9aa3a8c2 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9aa3a8c2 Branch: refs/heads/master Commit: 9aa3a8c22460f58691e0de7dee97dade5a567285 Parents: e1ef85a Author: rahulforallp Authored: Mon May 21 15:17:10 2018 +0530 Committer: manishgupta88 Committed: Tue May 22 17:25:53 2018 +0530 -- .../TestNonTransactionalCarbonTable.scala | 21 +--- .../sdk/file/CarbonWriterBuilder.java | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/9aa3a8c2/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index 1c74adc..afb9b2f 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -322,14 +322,12 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { Row("robot0", 0, 0.0), Row("robot1", 1, 0.5), Row("robot2", 2, 1.0))) -new File(writerPath).listFiles().map(x => LOGGER.audit(x.getName +" : "+x.lastModified())) FileUtils.deleteDirectory(new File(writerPath)) // Thread.sleep is required because it is possible sometime deletion // and creation of new file can happen at same timestamp. Thread.sleep(1000) assert(!new File(writerPath).exists()) buildTestDataWithSameUUID(4, false, null, List("name")) -new File(writerPath).listFiles().map(x => LOGGER.audit(x.getName +" : "+x.lastModified())) checkAnswer(sql("select * from sdkOutputTable"), Seq( Row("robot0", 0, 0.0), Row("robot1", 1, 0.5), @@ -379,9 +377,26 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { checkExistence(sql("describe formatted sdkOutputTable"), true, "name") +buildTestDataWithSortColumns(List()) +assert(new File(writerPath).exists()) +sql("DROP TABLE IF EXISTS sdkOutputTable") + +// with partition +sql( + s"""CREATE EXTERNAL TABLE sdkOutputTable(name string) PARTITIONED BY (age int) STORED BY + |'carbondata' LOCATION + |'$writerPath' """.stripMargin) + +sql("describe formatted sdkOutputTable").show(false) +sql("select * from sdkOutputTable").show() + +intercept[RuntimeException] { + buildTestDataWithSortColumns(List("")) +} + sql("DROP TABLE sdkOutputTable") // drop table should not delete the files -assert(new File(writerPath).exists()) +assert(!(new File(writerPath).exists())) cleanTestData() } http://git-wip-us.apache.org/repos/asf/carbondata/blob/9aa3a8c2/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java -- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java index bf99e05..e846da4 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java @@ -377,7 +377,7 @@ public class CarbonWriterBuilder { } List sortColumnsList = new ArrayList<>(); -if (sortColumns == null) { +if (sortColumns == null || sortColumns.length == 0) { // If sort columns are not specified, default set all dimensions to sort column. // When dimensions are default set to sort column, // Inverted index will be supported by default for sort columns.
carbondata git commit: [CARBONDATA-2496] Changed to hadoop bloom implementation and added compress option to compress bloom on disk
Repository: carbondata Updated Branches: refs/heads/master d9534c2c0 -> 77a11107c [CARBONDATA-2496] Changed to hadoop bloom implementation and added compress option to compress bloom on disk This PR removes the guava bloom and adds the hadoop bloom. And also added the compress bloom option to compress bloom on disk and in memory as well. The user can use bloom_compress property to enable/disable compression. By default, it is enabled. Please check the performance of bloom Loaded 100 million data with bloom datamap on a column with a cardinality of 5 million with 'BLOOM_SIZE'='500', 'bloom_fpp'='0.001'. This closes #2324 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/77a11107 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/77a11107 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/77a11107 Branch: refs/heads/master Commit: 77a11107c57beebda74925dbb328f7bad6c72136 Parents: d9534c2 Author: ravipesala Authored: Sun May 20 21:52:57 2018 +0530 Committer: manishgupta88 Committed: Tue May 22 14:24:58 2018 +0530 -- .../blockletindex/BlockletDataMapFactory.java | 5 +- .../datamap/bloom/BloomCoarseGrainDataMap.java | 8 +- .../bloom/BloomCoarseGrainDataMapFactory.java | 42 ++-- .../carbondata/datamap/bloom/BloomDMModel.java | 35 -- .../datamap/bloom/BloomDataMapBuilder.java | 12 ++- .../datamap/bloom/BloomDataMapCache.java| 12 +-- .../datamap/bloom/BloomDataMapWriter.java | 60 ++- .../hadoop/util/bloom/CarbonBloomFilter.java| 108 +++ 8 files changed, 225 insertions(+), 57 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/77a11107/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java index 0188281..318fc6e 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java @@ -18,6 +18,7 @@ package org.apache.carbondata.core.indexstore.blockletindex; import java.io.IOException; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; import org.apache.carbondata.core.cache.Cache; import org.apache.carbondata.core.cache.CacheProvider; @@ -78,7 +79,7 @@ public class BlockletDataMapFactory extends CoarseGrainDataMapFactory private AbsoluteTableIdentifier identifier; // segmentId -> list of index file - private Map> segmentMap = new HashMap<>(); + private Map> segmentMap = new ConcurrentHashMap<>(); private Cache cache; @@ -279,7 +280,7 @@ public class BlockletDataMapFactory extends CoarseGrainDataMapFactory } @Override - public void clear() { + public synchronized void clear() { if (segmentMap.size() > 0) { for (String segmentId : segmentMap.keySet().toArray(new String[segmentMap.size()])) { clear(new Segment(segmentId, null, null)); http://git-wip-us.apache.org/repos/asf/carbondata/blob/77a11107/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java -- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java index 09de25e..a5a141c 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java @@ -44,6 +44,7 @@ import org.apache.carbondata.core.util.CarbonUtil; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.bloom.Key; /** * BloomDataCoarseGrainMap is constructed in blocklet level. For each indexed column, @@ -83,7 +84,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { @Override public List prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, - List partitions) throws IOException { + List partitions) { List hitBlocklets = new ArrayList(); if (filterExp == null) { // null is different from empty here. Empty means after pruning, no blocklet need to scan. @@ -97,8 +98,8 @@ public class BloomCoarseGrainDataM
carbondata git commit: [CARBONDATA-2227] Added support to show partition details in describe formatted
Repository: carbondata Updated Branches: refs/heads/master 2ebfab151 -> 604902b9a [CARBONDATA-2227] Added support to show partition details in describe formatted Added Detailed information in describe formatted command like partition location and partition values. Example Usage: Descsribe formatted partition(partition_col_name=partition_value) This closes #2033 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/604902b9 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/604902b9 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/604902b9 Branch: refs/heads/master Commit: 604902b9a52ec613c1ec025b4dc33657b179895e Parents: 2ebfab1 Author: kunal642 Authored: Mon Mar 5 20:33:06 2018 +0530 Committer: manishgupta88 Committed: Tue May 22 11:37:07 2018 +0530 -- .../describeTable/TestDescribeTable.scala | 20 +++ .../partition/TestDDLForPartitionTable.scala| 2 +- .../table/CarbonDescribeFormattedCommand.scala | 26 +--- .../sql/execution/strategy/DDLStrategy.scala| 1 + 4 files changed, 44 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/604902b9/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala index fe01598..1e333ee 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/describeTable/TestDescribeTable.scala @@ -28,6 +28,7 @@ class TestDescribeTable extends QueryTest with BeforeAndAfterAll { override def beforeAll: Unit = { sql("DROP TABLE IF EXISTS Desc1") sql("DROP TABLE IF EXISTS Desc2") +sql("drop table if exists a") sql("CREATE TABLE Desc1(Dec1Col1 String, Dec1Col2 String, Dec1Col3 int, Dec1Col4 double) stored by 'carbondata'") sql("DESC Desc1") sql("DROP TABLE Desc1") @@ -56,9 +57,28 @@ class TestDescribeTable extends QueryTest with BeforeAndAfterAll { assert(sql("desc formatted desc1").count() == 20) } + test("test describe formatted for partition table") { +sql("create table a(a string) partitioned by (b int) stored by 'carbondata'") +sql("insert into a values('a',1)") +sql("insert into a values('a',2)") +val desc = sql("describe formatted a").collect() +assert(desc(desc.indexWhere(_.get(0).toString.contains("#Partition")) + 2).get(0).toString.contains("b")) +val descPar = sql("describe formatted a partition(b=1)").collect +descPar.find(_.get(0).toString.contains("Partition Value:")) match { + case Some(row) => assert(row.get(1).toString.contains("1")) + case None => fail("Partition Value not found in describe formatted") +} +descPar.find(_.get(0).toString.contains("Location:")) match { + case Some(row) => assert(row.get(1).toString.contains("target/warehouse/a/b=1")) + case None => fail("Partition Location not found in describe formatted") +} +assert(descPar.exists(_.toString().contains("Partition Parameters:"))) + } + override def afterAll: Unit = { sql("DROP TABLE Desc1") sql("DROP TABLE Desc2") +sql("drop table if exists a") } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/604902b9/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala index 2cbafa8..cafd465 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/partition/TestDDLForPartitionTable.scala @@ -145,7 +145,7 @@ class TestDDLForPartitionTa
[39/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/nation.csv -- diff --git a/integration/spark-common-test/src/test/resources/tpch/nation.csv b/integration/spark-common-test/src/test/resources/tpch/nation.csv new file mode 100644 index 000..ed3fd5b --- /dev/null +++ b/integration/spark-common-test/src/test/resources/tpch/nation.csv @@ -0,0 +1,25 @@ +0|ALGERIA|0| haggle. carefully final deposits detect slyly agai| +1|ARGENTINA|1|al foxes promise slyly according to the regular accounts. bold requests alon| +2|BRAZIL|1|y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special | +3|CANADA|1|eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold| +4|EGYPT|4|y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d| +5|ETHIOPIA|0|ven packages wake quickly. regu| +6|FRANCE|3|refully final requests. regular, ironi| +7|GERMANY|3|l platelets. regular accounts x-ray: unusual, regular acco| +8|INDIA|2|ss excuses cajole slyly across the packages. deposits print aroun| +9|INDONESIA|2| slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull| +10|IRAN|4|efully alongside of the slyly final dependencies. | +11|IRAQ|4|nic deposits boost atop the quickly final requests? quickly regula| +12|JAPAN|2|ously. final, express gifts cajole a| +13|JORDAN|4|ic deposits are blithely about the carefully regular pa| +14|KENYA|0| pending excuses haggle furiously deposits. pending, express pinto beans wake fluffily past t| +15|MOROCCO|0|rns. blithely bold courts among the closely regular packages use furiously bold platelets?| +16|MOZAMBIQUE|0|s. ironic, unusual asymptotes wake blithely r| +17|PERU|1|platelets. blithely pending dependencies use fluffily across the even pinto beans. carefully silent accoun| +18|CHINA|2|c dependencies. furiously express notornis sleep slyly regular accounts. ideas sleep. depos| +19|ROMANIA|3|ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account| +20|SAUDI ARABIA|4|ts. silent requests haggle. closely express packages sleep across the blithely| +21|VIETNAM|2|hely enticingly express accounts. even, final | +22|RUSSIA|3| requests against the platelets use never according to the quickly regular pint| +23|UNITED KINGDOM|3|eans boost carefully special requests. accounts are. carefull| +24|UNITED STATES|1|y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be|
[40/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/lineitem.csv -- diff --git a/integration/spark-common-test/src/test/resources/tpch/lineitem.csv b/integration/spark-common-test/src/test/resources/tpch/lineitem.csv new file mode 100644 index 000..e792319 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/tpch/lineitem.csv @@ -0,0 +1,1000 @@ +1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the| +1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold | +1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-31|TAKE BACK RETURN|REG AIR|riously. regular, express dep| +1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-16|NONE|AIR|lites. fluffily even de| +1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-01|NONE|FOB| pending foxes. slyly re| +1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-03|DELIVER IN PERSON|MAIL|arefully slyly ex| +2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a| +3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-23|NONE|AIR|ongside of the furiously brave acco| +3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-24|TAKE BACK RETURN|RAIL| unusual accounts. eve| +3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-23|DELIVER IN PERSON|SHIP|nal foxes wake. | +3|29380|1883|4|2|2618.76|0.01|0.06|A|F|1993-12-04|1994-01-07|1994-01-01|NONE|TRUCK|y. fluffily pending d| +3|183095|650|5|28|32986.52|0.04|0.00|R|F|1993-12-14|1994-01-10|1994-01-01|TAKE BACK RETURN|FOB|ages nag slyly pending| +3|62143|9662|6|26|28733.64|0.10|0.02|A|F|1993-10-29|1993-12-18|1993-11-04|TAKE BACK RETURN|RAIL|ges sleep after the caref| +4|88035|5560|1|30|30690.90|0.03|0.08|N|O|1996-01-10|1995-12-14|1996-01-18|DELIVER IN PERSON|REG AIR|- quickly regular packages sleep. idly| +5|108570|8571|1|15|23678.55|0.02|0.04|R|F|1994-10-31|1994-08-31|1994-11-20|NONE|AIR|ts wake furiously | +5|123927|3928|2|26|50723.92|0.07|0.08|R|F|1994-10-16|1994-09-25|1994-10-19|NONE|FOB|sts use slyly quickly special instruc| +5|37531|35|3|50|73426.50|0.08|0.03|A|F|1994-08-08|1994-10-13|1994-08-26|DELIVER IN PERSON|AIR|eodolites. fluffily unusual| +6|139636|2150|1|37|61998.31|0.08|0.03|A|F|1992-04-27|1992-05-15|1992-05-02|TAKE BACK RETURN|TRUCK|p furiously special foxes| +7|182052|9607|1|12|13608.60|0.07|0.03|N|O|1996-05-07|1996-03-13|1996-06-03|TAKE BACK RETURN|FOB|ss pinto beans wake against th| +7|145243|7758|2|9|11594.16|0.08|0.08|N|O|1996-02-01|1996-03-02|1996-02-19|TAKE BACK RETURN|SHIP|es. instructions| +7|94780|9799|3|46|81639.88|0.10|0.07|N|O|1996-01-15|1996-03-27|1996-02-03|COLLECT COD|MAIL| unusual reques| +7|163073|3074|4|28|31809.96|0.03|0.04|N|O|1996-03-21|1996-04-08|1996-04-20|NONE|FOB|. slyly special requests haggl| +7|151894|9440|5|38|73943.82|0.08|0.01|N|O|1996-02-11|1996-02-24|1996-02-18|DELIVER IN PERSON|TRUCK|ns haggle carefully ironic deposits. bl| +7|79251|1759|6|35|43058.75|0.06|0.03|N|O|1996-01-16|1996-02-23|1996-01-22|TAKE BACK RETURN|FOB|jole. excuses wake carefully alongside of | +7|157238|2269|7|5|6476.15|0.04|0.02|N|O|1996-02-10|1996-03-26|1996-02-13|NONE|FOB|ithely regula| +32|82704|7721|1|28|47227.60|0.05|0.08|N|O|1995-10-23|1995-08-27|1995-10-26|TAKE BACK RETURN|TRUCK|sleep quickly. req| +32|197921|441|2|32|64605.44|0.02|0.00|N|O|1995-08-14|1995-10-07|1995-08-27|COLLECT COD|AIR|lithely regular deposits. fluffily | +32|44161||3|2|2210.32|0.09|0.02|N|O|1995-08-07|1995-10-07|1995-08-23|DELIVER IN PERSON|AIR| express accounts wake according to the| +32|2743|7744|4|4|6582.96|0.09|0.03|N|O|1995-08-04|1995-10-01|1995-09-03|NONE|REG AIR|e slyly final pac| +32|85811|8320|5|44|79059.64|0.05|0.06|N|O|1995-08-28|1995-08-20|1995-09-14|DELIVER IN PERSON|AIR|symptotes nag according to the ironic depo| +32|11615|4117|6|6|9159.66|0.04|0.03|N|O|1995-07-21|1995-09-23|1995-07-25|COLLECT COD|RAIL| gifts cajole carefully.| +33|61336|8855|1|31|40217.23|0.09|0.04|A|F|1993-10-29|1993-12-19|1993-11-08|COLLECT COD|TRUCK|ng to the furiously ironic package| +33|60519|5532|2|32|47344.32|0.02|0.05|A|F|1993-12-09|1994-01-04|1993-12-28|COLLECT COD|MAIL|gular theodolites| +33|137469|9983|3|5|7532.30|0.05|0.03|A|F|1993-12-09|1993-12-25|1993-12-23|TAKE BACK RETURN|AIR|. stealthily bold exc| +33|33918|3919|4|41|75928.31|0.09|0.00|R|F|1993-11-09|1994-01-24|1993-11-11|TAKE BACK RETURN|MAIL|unusual packages doubt caref| +34|88362|871|1|13|17554.68|0.00|0.07|N|O|1998-10-23|1998-09-14|1998-11-06|NONE|REG AIR|nic accounts. deposits are alon| +34|89414|1923|2|22|30875.02|0.08|0.06|N|O|1998-10-09|1998-10-16|1998-1
[25/50] [abbrv] carbondata git commit: [CARBONDATA-2369] updated the document about AVRO to carbon schema converter
[CARBONDATA-2369] updated the document about AVRO to carbon schema converter updated the document about AVRO to carbon schema converter This closes #2296 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/443b717d Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/443b717d Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/443b717d Branch: refs/heads/spark-2.3 Commit: 443b717d20e0eedf8be75dae177d573e53c050fc Parents: d85fb72 Author: ajantha-bhat Authored: Thu May 10 21:03:28 2018 +0530 Committer: kumarvishal09 Committed: Fri May 11 15:47:17 2018 +0530 -- README.md| 1 + docs/sdk-writer-guide.md | 25 ++--- 2 files changed, 19 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/443b717d/README.md -- diff --git a/README.md b/README.md index 3f45917..4b4577e 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ CarbonData is built using Apache Maven, to [build CarbonData](https://github.com * [Cluster Installation and Deployment](https://github.com/apache/carbondata/blob/master/docs/installation-guide.md) * [Configuring Carbondata](https://github.com/apache/carbondata/blob/master/docs/configuration-parameters.md) * [Streaming Ingestion](https://github.com/apache/carbondata/blob/master/docs/streaming-guide.md) +* [SDK Writer Guide](https://github.com/apache/carbondata/blob/master/docs/sdk-writer-guide.md) * [CarbonData Pre-aggregate DataMap](https://github.com/apache/carbondata/blob/master/docs/datamap/preaggregate-datamap-guide.md) * [CarbonData Timeseries DataMap](https://github.com/apache/carbondata/blob/master/docs/datamap/timeseries-datamap-guide.md) * [FAQ](https://github.com/apache/carbondata/blob/master/docs/faq.md) http://git-wip-us.apache.org/repos/asf/carbondata/blob/443b717d/docs/sdk-writer-guide.md -- diff --git a/docs/sdk-writer-guide.md b/docs/sdk-writer-guide.md index 18b583a..9878b71 100644 --- a/docs/sdk-writer-guide.md +++ b/docs/sdk-writer-guide.md @@ -52,6 +52,7 @@ import java.io.IOException; import org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException; import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.sdk.file.AvroCarbonWriter; import org.apache.carbondata.sdk.file.CarbonWriter; import org.apache.carbondata.sdk.file.Field; @@ -75,25 +76,24 @@ public class TestSdkAvro { " \"type\" : \"record\"," + " \"name\" : \"Acme\"," + " \"fields\" : [" -+ "{ \"name\" : \"name\", \"type\" : \"string\" }," ++ "{ \"name\" : \"fname\", \"type\" : \"string\" }," + "{ \"name\" : \"age\", \"type\" : \"int\" }]" + "}"; -String json = "{\"name\":\"bob\", \"age\":10}"; +String json = "{\"fname\":\"bob\", \"age\":10}"; // conversion to GenericData.Record JsonAvroConverter converter = new JsonAvroConverter(); GenericData.Record record = converter.convertToGenericDataRecord( json.getBytes(CharEncoding.UTF_8), new org.apache.avro.Schema.Parser().parse(avroSchema)); -// for sdk schema -Field[] fields = new Field[2]; -fields[0] = new Field("name", DataTypes.STRING); -fields[1] = new Field("age", DataTypes.STRING); +// prepare carbon schema from avro schema +org.apache.carbondata.sdk.file.Schema carbonSchema = +AvroCarbonWriter.getCarbonSchemaFromAvroSchema(avroSchema); try { CarbonWriter writer = CarbonWriter.builder() - .withSchema(new org.apache.carbondata.sdk.file.Schema(fields)) + .withSchema(carbonSchema) .outputPath(path) .buildWriterForAvroInput(); @@ -345,4 +345,15 @@ public Schema(Field[] fields); * @return Schema */ public static Schema parseJson(String json); +``` + +### Class org.apache.carbondata.sdk.file.AvroCarbonWriter +``` +/** +* converts avro schema to carbon schema, required by carbonWriter +* +* @param avroSchemaString json formatted avro schema as string +* @return carbon sdk schema +*/ +public static org.apache.carbondata.sdk.file.Schema getCarbonSchemaFromAvroSchema(String avroSchemaString); ``` \ No newline at end of file
[45/50] [abbrv] carbondata git commit: [CARBONDATA-2370] Added document for presto multinode setup for carbondata
[CARBONDATA-2370] Added document for presto multinode setup for carbondata Added document for presto multinode setup for carbondata This closes #2199 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3d23fa69 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3d23fa69 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3d23fa69 Branch: refs/heads/spark-2.3 Commit: 3d23fa693a604701e3ab3b574b20f21d089e8b43 Parents: 1c5b526 Author: Geetika Gupta Authored: Fri Apr 20 16:47:35 2018 +0530 Committer: chenliang613 Committed: Mon May 14 21:43:24 2018 +0800 -- .../Presto_Cluster_Setup_For_Carbondata.md | 133 +++ 1 file changed, 133 insertions(+) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3d23fa69/integration/presto/Presto_Cluster_Setup_For_Carbondata.md -- diff --git a/integration/presto/Presto_Cluster_Setup_For_Carbondata.md b/integration/presto/Presto_Cluster_Setup_For_Carbondata.md new file mode 100644 index 000..082b8fe --- /dev/null +++ b/integration/presto/Presto_Cluster_Setup_For_Carbondata.md @@ -0,0 +1,133 @@ +# Presto Multinode Cluster setup For Carbondata + +## Installing Presto + + 1. Download the 0.187 version of Presto using: + `wget https://repo1.maven.org/maven2/com/facebook/presto/presto-server/0.187/presto-server-0.187.tar.gz` + + 2. Extract Presto tar file: `tar zxvf presto-server-0.187.tar.gz`. + + 3. Download the Presto CLI for the coordinator and name it presto. + + ``` +wget https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/0.187/presto-cli-0.187-executable.jar + +mv presto-cli-0.187-executable.jar presto + +chmod +x presto + ``` + + ## Create Configuration Files + + 1. Create `etc` folder in presto-server-0.187 directory. + 2. Create `config.properties`, `jvm.config`, `log.properties`, and `node.properties` files. + 3. Install uuid to generate a node.id. + + ``` + sudo apt-get install uuid + + uuid + ``` + + +# Contents of your node.properties file + + ``` + node.environment=production + node.id= + node.data-dir=/home/ubuntu/data + ``` + +# Contents of your jvm.config file + + ``` + -server + -Xmx16G + -XX:+UseG1GC + -XX:G1HeapRegionSize=32M + -XX:+UseGCOverheadLimit + -XX:+ExplicitGCInvokesConcurrent + -XX:+HeapDumpOnOutOfMemoryError + -XX:OnOutOfMemoryError=kill -9 %p + ``` + +# Contents of your log.properties file + ``` + com.facebook.presto=INFO + ``` + + The default minimum level is `INFO`. There are four levels: `DEBUG`, `INFO`, `WARN` and `ERROR`. + +## Coordinator Configurations + + # Contents of your config.properties + ``` + coordinator=true + node-scheduler.include-coordinator=false + http-server.http.port=8086 + query.max-memory=50GB + query.max-memory-per-node=2GB + discovery-server.enabled=true + discovery.uri=:8086 + ``` +The options `node-scheduler.include-coordinator=false` and `coordinator=true` indicate that the node is the coordinator and tells the coordinator not to do any of the computation work itself and to use the workers. + +**Note**: We recommend setting `query.max-memory-per-node` to half of the JVM config max memory, though if your workload is highly concurrent, you may want to use a lower value for `query.max-memory-per-node`. + +Also relation between below two configuration-properties should be like: +If, `query.max-memory-per-node=30GB` +Then, `query.max-memory=<30GB * number of nodes>`. + +## Worker Configurations + +# Contents of your config.properties + + ``` + coordinator=false + http-server.http.port=8086 + query.max-memory=50GB + query.max-memory-per-node=2GB + discovery.uri=:8086 + ``` + +**Note**: `jvm.config` and `node.properties` files are same for all the nodes (worker + coordinator). All the nodes should have different `node.id`. + +## Catalog Configurations + +1. Create a folder named `catalog` in etc directory of presto on all the nodes of the cluster including the coordinator. + +# Configuring Carbondata in Presto +1. Create a file named `carbondata.properties` in the `catalog` folder and set the required properties on all the nodes. + +## Add Plugins + +1. Create a directory named `carbondata` in plugin directory of presto. +2. Copy `carbondata` jars to `plugin/carbondata` directory on all nodes. + +## Start Presto Server on all nodes + +``` +./presto-server-0.187/bin/launcher start +``` +To run it as a background process. + +``` +./presto-server-0.187/bin/launcher run +``` +To run it in foreground. + +## Start Presto CLI +``` +./presto +``` +To connect to carbondata catalog use the following command: + +``` +./presto --server :8086 --catalog carbondata --schema +``` +Execut
[43/50] [abbrv] carbondata git commit: [CARBONDATA-2431] Incremental data added after external table creation is not reflecting while doing select query issue is fixed.
[CARBONDATA-2431] Incremental data added after external table creation is not reflecting while doing select query issue is fixed. This closes #2262 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f1a6c7cf Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f1a6c7cf Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f1a6c7cf Branch: refs/heads/spark-2.3 Commit: f1a6c7cf548cd33ef26bd99f26c7fcf7e367c9c7 Parents: 2881c6b Author: rahulforallp Authored: Thu May 3 14:11:12 2018 +0530 Committer: ravipesala Committed: Mon May 14 11:27:33 2018 +0530 -- .../core/datamap/DataMapStoreManager.java | 19 +-- .../apache/carbondata/core/datamap/Segment.java | 7 + .../LatestFilesReadCommittedScope.java | 32 - .../ReadCommittedIndexFileSnapShot.java | 10 +- .../core/readcommitter/ReadCommittedScope.java | 5 + .../TableStatusReadCommittedScope.java | 13 ++ .../core/statusmanager/SegmentRefreshInfo.java | 65 + .../hadoop/api/CarbonTableInputFormat.java | 10 +- .../TestNonTransactionalCarbonTable.scala | 136 +++ 9 files changed, 282 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/f1a6c7cf/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java index a3be26a..072b86e 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java @@ -43,6 +43,7 @@ import org.apache.carbondata.core.metadata.schema.table.DiskBasedDMSchemaStorage import org.apache.carbondata.core.metadata.schema.table.RelationIdentifier; import org.apache.carbondata.core.mutate.SegmentUpdateDetails; import org.apache.carbondata.core.mutate.UpdateVO; +import org.apache.carbondata.core.statusmanager.SegmentRefreshInfo; import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager; import org.apache.carbondata.core.util.CarbonProperties; import org.apache.carbondata.core.util.CarbonSessionInfo; @@ -454,7 +455,7 @@ public final class DataMapStoreManager { // This map stores the latest segment refresh time.So in case of update/delete we check the // time against this map. -private Map segmentRefreshTime = new HashMap<>(); +private Map segmentRefreshTime = new HashMap<>(); // This map keeps the manual refresh entries from users. It is mainly used for partition // altering. @@ -465,23 +466,25 @@ public final class DataMapStoreManager { SegmentUpdateDetails[] updateStatusDetails = statusManager.getUpdateStatusDetails(); for (SegmentUpdateDetails updateDetails : updateStatusDetails) { UpdateVO updateVO = statusManager.getInvalidTimestampRange(updateDetails.getSegmentName()); -segmentRefreshTime.put(updateVO.getSegmentId(), updateVO.getCreatedOrUpdatedTimeStamp()); +segmentRefreshTime.put(updateVO.getSegmentId(), +new SegmentRefreshInfo(updateVO.getCreatedOrUpdatedTimeStamp(), 0)); } } -public boolean isRefreshNeeded(String segmentId, SegmentUpdateStatusManager statusManager) { - UpdateVO updateVO = statusManager.getInvalidTimestampRange(segmentId); +public boolean isRefreshNeeded(Segment seg, UpdateVO updateVo) throws IOException { + SegmentRefreshInfo segmentRefreshInfo = + seg.getSegmentRefreshInfo(updateVo); + String segmentId = seg.getSegmentNo(); if (segmentRefreshTime.get(segmentId) == null) { -segmentRefreshTime.put(segmentId, updateVO.getCreatedOrUpdatedTimeStamp()); +segmentRefreshTime.put(segmentId, segmentRefreshInfo); return true; } if (manualSegmentRefresh.get(segmentId) != null && manualSegmentRefresh.get(segmentId)) { manualSegmentRefresh.put(segmentId, false); return true; } - Long updateTimestamp = updateVO.getLatestUpdateTimestamp(); - boolean isRefresh = - updateTimestamp != null && (updateTimestamp > segmentRefreshTime.get(segmentId)); + + boolean isRefresh = segmentRefreshInfo.compare(segmentRefreshTime.get(segmentId)); if (isRefresh) { segmentRefreshTime.remove(segmentId); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/f1a6c7cf/core/src/main/java/org/apache/carbondata/core/datamap/Segment.java -- diff --git a/core/src/main/java/org/apache/carbondata/c
[50/50] [abbrv] carbondata git commit: [CARBONDATA-2465] Improve the carbondata file reliability in data load when direct hdfs write is enabled
[CARBONDATA-2465] Improve the carbondata file reliability in data load when direct hdfs write is enabled Problem: At present if we enable direct write on HDFS, data is written with replication of 1 which can cause data loss. Solution: Write with cluster replication. With this change No need to invoke CompleteHdfsBackendThread/completeRemainingHdfsReplicas for direct hdfs write case. This closes #2235 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8fe16566 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8fe16566 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8fe16566 Branch: refs/heads/spark-2.3 Commit: 8fe165668e2662455991f9de6af817ccc99b81ee Parents: 6297ea0 Author: KanakaKumar Authored: Thu Apr 26 23:39:29 2018 +0530 Committer: kunal642 Committed: Thu May 17 19:42:59 2018 +0530 -- .../apache/carbondata/core/util/CarbonUtil.java | 27 --- .../store/writer/AbstractFactDataWriter.java| 47 ++-- 2 files changed, 14 insertions(+), 60 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/8fe16566/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index ac0a800..9dc4aa2 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -2817,33 +2817,6 @@ public final class CarbonUtil { } /** - * This method will complete the remaining hdfs replications - * - * @param fileName hdfs file name - * @param fileType filetype - * @throws CarbonDataWriterException if error occurs - */ - public static void completeRemainingHdfsReplicas(String fileName, FileFactory.FileType fileType) -throws CarbonDataWriterException { -try { - long startTime = System.currentTimeMillis(); - short replication = FileFactory.getDefaultReplication(fileName, fileType); - if (1 == replication) { -return; - } - boolean replicateFlag = FileFactory.setReplication(fileName, fileType, replication); - if (!replicateFlag) { -LOGGER.error("Failed to set replication for " + fileName + " with factor " + replication); - } - LOGGER.info( - "Total copy time (ms) to copy file " + fileName + " is " + (System.currentTimeMillis() - - startTime)); -} catch (IOException e) { - throw new CarbonDataWriterException("Problem while completing remaining HDFS backups", e); -} - } - - /** * This method will read the local carbon data file and write to carbon data file in HDFS * * @param carbonStoreFilePath http://git-wip-us.apache.org/repos/asf/carbondata/blob/8fe16566/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java index 6e557cd..8115f97 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java @@ -176,6 +176,7 @@ public abstract class AbstractFactDataWriter implements CarbonFactDataWriter { CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_HDFS, CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_HDFS_DEFAULT); this.enableDirectlyWriteData2Hdfs = "TRUE".equalsIgnoreCase(directlyWriteData2Hdfs); + if (enableDirectlyWriteData2Hdfs) { LOGGER.info("Carbondata will directly write fact data to HDFS."); } else { @@ -274,22 +275,13 @@ public abstract class AbstractFactDataWriter implements CarbonFactDataWriter { protected void commitCurrentFile(boolean copyInCurrentThread) { notifyDataMapBlockEnd(); CarbonUtil.closeStreams(this.fileOutputStream, this.fileChannel); -if (enableDirectlyWriteData2Hdfs) { - if (copyInCurrentThread) { -CarbonUtil.completeRemainingHdfsReplicas(carbonDataFileHdfsPath, -FileFactory.FileType.HDFS); - } else { -executorServiceSubmitList.add(executorService.submit( -new CompleteHdfsBackendThread(carbonDataFileHdfsPath, FileFactory.FileType.HDFS))); - } -} else { +if (!enableDirectlyWriteData2Hdfs) { if (copyInCurrentThread) { CarbonUtil.copyCarbonDataFile
[44/50] [abbrv] carbondata git commit: [CARBONDATA-2468] addition of column to default sort_column is handled
[CARBONDATA-2468] addition of column to default sort_column is handled issue : default sort_column handling was missing solution : condition added for default sort_columns This closes #2293 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1c5b5265 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1c5b5265 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1c5b5265 Branch: refs/heads/spark-2.3 Commit: 1c5b5265921e89f20c5f5b69c135c9a1acc2d1df Parents: f1a6c7c Author: rahulforallp Authored: Thu May 10 16:17:39 2018 +0530 Committer: kumarvishal09 Committed: Mon May 14 17:47:45 2018 +0530 -- .../TestNonTransactionalCarbonTable.scala | 28 +--- .../sdk/file/CarbonWriterBuilder.java | 8 +- 2 files changed, 32 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1c5b5265/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index 58ce5fa..fb9c862 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -98,9 +98,9 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { buildTestData(3, false, options) } - def buildTestDataWithSortColumns(): Any = { + def buildTestDataWithSortColumns(sortColumns: List[String]): Any = { FileUtils.deleteDirectory(new File(writerPath)) -buildTestData(3, false, null, List("age", "name")) +buildTestData(3, false, null, sortColumns) } def buildTestData(rows: Int, persistSchema: Boolean, options: util.Map[String, String]): Any = { @@ -302,7 +302,7 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { } test("test create external table with sort columns") { -buildTestDataWithSortColumns() +buildTestDataWithSortColumns(List("age","name")) assert(new File(writerPath).exists()) sql("DROP TABLE IF EXISTS sdkOutputTable") @@ -316,6 +316,28 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { checkExistence(sql("describe formatted sdkOutputTable"), true, writerPath) +buildTestDataWithSortColumns(List("age")) +assert(new File(writerPath).exists()) +sql("DROP TABLE IF EXISTS sdkOutputTable") +// with partition +sql( + s"""CREATE EXTERNAL TABLE sdkOutputTable(name string) PARTITIONED BY (age int) STORED BY + |'carbondata' LOCATION + |'$writerPath' """.stripMargin) + +checkExistence(sql("describe formatted sdkOutputTable"), true, "age") + +buildTestDataSingleFile() +assert(new File(writerPath).exists()) +sql("DROP TABLE IF EXISTS sdkOutputTable") +// with partition +sql( + s"""CREATE EXTERNAL TABLE sdkOutputTable(name string) PARTITIONED BY (age int) STORED BY + |'carbondata' LOCATION + |'$writerPath' """.stripMargin) + +checkExistence(sql("describe formatted sdkOutputTable"), true, "name") + sql("DROP TABLE sdkOutputTable") // drop table should not delete the files assert(new File(writerPath).exists()) http://git-wip-us.apache.org/repos/asf/carbondata/blob/1c5b5265/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java -- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java index 00ba8a5..1816539 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonWriterBuilder.java @@ -445,6 +445,7 @@ public class CarbonWriterBuilder { "column: " + sortColumn + " specified in sort columns does not exist in schema"); } } +int i = 0; for (Field field : fields) { if (null != field) { int isSortColumn = sortColumnsList.indexOf(field.getFieldName()); @@ -481,9 +482,14 @@ public class CarbonWriterBuilder { ColumnSchema columnSchema = tableSchemaBuilder .addColumn(new StructField(field.get
[37/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/region.csv -- diff --git a/integration/spark-common-test/src/test/resources/tpch/region.csv b/integration/spark-common-test/src/test/resources/tpch/region.csv new file mode 100644 index 000..c5ebb63 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/tpch/region.csv @@ -0,0 +1,5 @@ +0|AFRICA|lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to | +1|AMERICA|hs use ironic, even requests. s| +2|ASIA|ges. thinly even pinto beans ca| +3|EUROPE|ly final courts cajole furiously final excuse| +4|MIDDLE EAST|uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl|
[47/50] [abbrv] carbondata git commit: [CARBONDATA-2486][DOC] Update set search mode information in the documentation
[CARBONDATA-2486][DOC] Update set search mode information in the documentation Update set search mode information in the documentation This closes #2312 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1d302a8b Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1d302a8b Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1d302a8b Branch: refs/heads/spark-2.3 Commit: 1d302a8b563c80e762985e10ed2bf9c9917e812d Parents: fc4b7f9 Author: xubo245 <601450...@qq.com> Authored: Wed May 16 20:43:59 2018 +0800 Committer: chenliang613 Committed: Thu May 17 11:55:19 2018 +0800 -- docs/configuration-parameters.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d302a8b/docs/configuration-parameters.md -- diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md index e39d61b..11cc6ea 100644 --- a/docs/configuration-parameters.md +++ b/docs/configuration-parameters.md @@ -133,7 +133,8 @@ This section provides the details of all the configurations required for CarbonD | carbon.enableMinMax | true | Min max is feature added to enhance query performance. To disable this feature, set it false. | | carbon.dynamicallocation.schedulertimeout | 5 | Specifies the maximum time (unit in seconds) the scheduler can wait for executor to be active. Minimum value is 5 sec and maximum value is 15 sec. | | carbon.scheduler.minregisteredresourcesratio | 0.8 | Specifies the minimum resource (executor) ratio needed for starting the block distribution. The default value is 0.8, which indicates 80% of the requested resource is allocated for starting block distribution. The minimum value is 0.1 min and the maximum value is 1.0. | - +| carbon.search.enabled | false | If set to true, it will use CarbonReader to do distributed scan directly instead of using compute framework like spark, thus avoiding limitation of compute framework like SQL optimizer and task scheduling overhead. | + * **Global Dictionary Configurations** | Parameter | Default Value | Description |
[27/50] [abbrv] carbondata git commit: [CARBONDATA-2474] Support Modular Plan for Materialized View DataMap
http://git-wip-us.apache.org/repos/asf/carbondata/blob/ffddba70/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_QueryBatch.scala -- diff --git a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_QueryBatch.scala b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_QueryBatch.scala new file mode 100644 index 000..8262dfa --- /dev/null +++ b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_QueryBatch.scala @@ -0,0 +1,4293 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.mv.testutil + +object Tpcds_1_4_QueryBatch { + + // should be random generated based on scale + // RC=ulist(random(1, rowcount("store_sales")/5,uniform),5); + val rc = Array(100, 100, 100, 100, 100) + + // Queries the TPCDS 1.4 queries using the qualifcations values in the templates. + + val tpcds1_4Queries = Seq( +("q1", + """ +| WITH customer_total_return AS +| (SELECT sr_customer_sk AS ctr_customer_sk, sr_store_sk AS ctr_store_sk, +| sum(sr_return_amt) AS ctr_total_return +|FROM store_returns, date_dim +|WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000 +|GROUP BY sr_customer_sk, sr_store_sk) +| SELECT c_customer_id +| FROM customer_total_return ctr1, store, customer +| WHERE ctr1.ctr_total_return > +|(SELECT avg(ctr_total_return)*1.2 +| FROM customer_total_return ctr2 +| WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) +| AND s_store_sk = ctr1.ctr_store_sk +| AND s_state = 'TN' +| AND ctr1.ctr_customer_sk = c_customer_sk +| ORDER BY c_customer_id LIMIT 100 + """.stripMargin), +("q2", + """ +| WITH wscs as +| (SELECT sold_date_sk, sales_price +| FROM (SELECT ws_sold_date_sk sold_date_sk, ws_ext_sales_price sales_price +|FROM web_sales) x +|UNION ALL +| (SELECT cs_sold_date_sk sold_date_sk, cs_ext_sales_price sales_price +|FROM catalog_sales)), +| wswscs AS +| (SELECT d_week_seq, +|sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, +|sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, +|sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, +|sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, +|sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, +|sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, +|sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales +| FROM wscs, date_dim +| WHERE d_date_sk = sold_date_sk +| GROUP BY d_week_seq) +| SELECT d_week_seq1 +| ,round(sun_sales1/sun_sales2,2) +| ,round(mon_sales1/mon_sales2,2) +| ,round(tue_sales1/tue_sales2,2) +| ,round(wed_sales1/wed_sales2,2) +| ,round(thu_sales1/thu_sales2,2) +| ,round(fri_sales1/fri_sales2,2) +| ,round(sat_sales1/sat_sales2,2) +| FROM +| (SELECT wswscs.d_week_seq d_week_seq1 +|,sun_sales sun_sales1 +|,mon_sales mon_sales1 +|,tue_sales tue_sales1 +|,wed_sales wed_sales1 +|,thu_sales thu_sales1 +|,fri_sales fri_sales1 +|,sat_sales sat_sales1 +| FROM wswscs,date_dim +| WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y, +| (SELECT wswscs.d_week_seq d_week_seq2 +|,sun_sales sun_sales2 +|,mon_sales mon_sales2 +|,tue_sales tue_sales2 +|,wed_sales wed_sales2 +|,thu_sales thu_sales2 +|,fri_sal
[26/50] [abbrv] carbondata git commit: [CARBONDATA-2474] Support Modular Plan for Materialized View DataMap
http://git-wip-us.apache.org/repos/asf/carbondata/blob/ffddba70/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala -- diff --git a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala new file mode 100644 index 000..97772c7 --- /dev/null +++ b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala @@ -0,0 +1,819 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.mv.testutil + +object Tpcds_1_4_Tables { + val tpcds1_4Tables = Seq[String]( +s""" + |CREATE TABLE catalog_sales ( + | `cs_sold_date_sk` int, + | `cs_sold_time_sk` int, + | `cs_ship_date_sk` int, + | `cs_bill_customer_sk` int, + | `cs_bill_cdemo_sk` int, + | `cs_bill_hdemo_sk` int, + | `cs_bill_addr_sk` int, + | `cs_ship_customer_sk` int, + | `cs_ship_cdemo_sk` int, + | `cs_ship_hdemo_sk` int, + | `cs_ship_addr_sk` int, + | `cs_call_center_sk` int, + | `cs_catalog_page_sk` int, + | `cs_ship_mode_sk` int, + | `cs_warehouse_sk` int, + | `cs_item_sk` int, + | `cs_promo_sk` int, + | `cs_order_number` bigint, + | `cs_quantity` int, + | `cs_wholesale_cost` decimal(7,2), + | `cs_list_price` decimal(7,2), + | `cs_sales_price` decimal(7,2), + | `cs_ext_discount_amt` decimal(7,2), + | `cs_ext_sales_price` decimal(7,2), + | `cs_ext_wholesale_cost` decimal(7,2), + | `cs_ext_list_price` decimal(7,2), + | `cs_ext_tax` decimal(7,2), + | `cs_coupon_amt` decimal(7,2), + | `cs_ext_ship_cost` decimal(7,2), + | `cs_net_paid` decimal(7,2), + | `cs_net_paid_inc_tax` decimal(7,2), + | `cs_net_paid_inc_ship` decimal(7,2), + | `cs_net_paid_inc_ship_tax` decimal(7,2), + | `cs_net_profit` decimal(7,2) + |) + |STORED BY 'org.apache.carbondata.format' + """.stripMargin.trim, +s""" + |CREATE TABLE catalog_returns ( + | `cr_returned_date_sk` int, + | `cr_returned_time_sk` int, + | `cr_item_sk` int, + | `cr_refunded_customer_sk` int, + | `cr_refunded_cdemo_sk` int, + | `cr_refunded_hdemo_sk` int, + | `cr_refunded_addr_sk` int, + | `cr_returning_customer_sk` int, + | `cr_returning_cdemo_sk` int, + | `cr_returning_hdemo_sk` int, + | `cr_returning_addr_sk` int, + | `cr_call_center_sk` int, + | `cr_catalog_page_sk` int, + | `cr_ship_mode_sk` int, + | `cr_warehouse_sk` int, + | `cr_reason_sk` int, + | `cr_order_number` bigint, + | `cr_return_quantity` int, + | `cr_return_amount` decimal(7,2), + | `cr_return_tax` decimal(7,2), + | `cr_return_amt_inc_tax` decimal(7,2), + | `cr_fee` decimal(7,2), + | `cr_return_ship_cost` decimal(7,2), + | `cr_refunded_cash` decimal(7,2), + | `cr_reversed_charge` decimal(7,2), + | `cr_store_credit` decimal(7,2), + | `cr_net_loss` decimal(7,2) + |) + |STORED BY 'org.apache.carbondata.format' + """.stripMargin.trim, +s""" + |CREATE TABLE inventory ( + | `inv_date_sk` int, + | `inv_item_sk` int, + | `inv_warehouse_sk` int, + | `inv_quantity_on_hand` int + |) + |STORED BY 'org.apache.carbondata.format' + """.stripMargin.trim, +s""" + |CREATE TABLE store_sales ( + | `ss_sold_date_sk` int, + | `ss_sold_time_sk` int, + | `ss_item_sk` int, + | `ss_customer_sk` int, + | `ss_cdemo_sk` int, + | `ss_hdemo_sk` int, + | `ss_addr_sk` int, + | `ss_store_sk` int, + | `ss_promo_sk` int, + | `ss_ticket_number` bigint, + | `ss_quantity` int, + | `ss_wholesale_cost` decimal(7,2), + | `ss_list_price` decimal(7,2), + | `ss_sales_price` decimal(7,2), + | `ss_ext_discount_amt` decimal(7,
[33/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
http://git-wip-us.apache.org/repos/asf/carbondata/blob/bf73e9fe/datamap/mv/core/src/main/scala/org/apache/carbondata/mv/rewrite/Utils.scala -- diff --git a/datamap/mv/core/src/main/scala/org/apache/carbondata/mv/rewrite/Utils.scala b/datamap/mv/core/src/main/scala/org/apache/carbondata/mv/rewrite/Utils.scala new file mode 100644 index 000..074d369 --- /dev/null +++ b/datamap/mv/core/src/main/scala/org/apache/carbondata/mv/rewrite/Utils.scala @@ -0,0 +1,358 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.mv.rewrite + +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, Expression, PredicateHelper} +import org.apache.spark.sql.catalyst.expressions.aggregate._ + +import org.apache.carbondata.mv.plans.modular +import org.apache.carbondata.mv.plans.modular.ModularPlan + +/** + * Utility functions used by mqo matcher to convert our plan to new aggregation code path + */ +private[rewrite] object Utils extends PredicateHelper { + + // use for match qb_2a, qb_2q and sel_3a, sel_3q + private def doMatch( + operator_a: modular.Matchable, + operator_q: modular.Matchable, + alias_m: AttributeMap[Alias]): Option[modular.Matchable] = { +var matchable = true +val matched = operator_q.transformExpressions { + case cnt_q@AggregateExpression(Count(exprs_q), _, false, _) => +operator_a.outputList.find { + case alias: Alias if alias_m.contains(alias.toAttribute) && + alias_m(alias.toAttribute).child.isInstanceOf[AggregateExpression] && + alias_m(alias.toAttribute).child.asInstanceOf[AggregateExpression] + .aggregateFunction.isInstanceOf[Count] => +// case for groupby +val cnt_a = alias_m(alias.toAttribute).child.asInstanceOf[AggregateExpression] +val exprs_a = cnt_a.aggregateFunction.asInstanceOf[Count].children +if (cnt_a.isDistinct != cnt_q.isDistinct || exprs_q.length != exprs_a.length) { + false +} else { + exprs_a.sortBy(_.hashCode()).zip(exprs_q.sortBy(_.hashCode())) +.forall(p => p._1.semanticEquals(p._2)) +} + + case attr: Attribute if alias_m.contains(attr) && + alias_m(attr).child.isInstanceOf[AggregateExpression] && + alias_m(attr).child.asInstanceOf[AggregateExpression] +.aggregateFunction.isInstanceOf[Count] => +val cnt_a = alias_m(attr).child.asInstanceOf[AggregateExpression] +val exprs_a = cnt_a.aggregateFunction.asInstanceOf[Count].children +if (cnt_a.isDistinct != cnt_q.isDistinct || exprs_q.length != exprs_a.length) { + false +} else { + exprs_a.sortBy(_.hashCode()).zip(exprs_q.sortBy(_.hashCode())) +.forall(p => p._1.semanticEquals(p._2)) +} + + case _ => false +}.map { cnt => AggregateExpression( +Sum(cnt.toAttribute), +cnt_q.mode, +isDistinct = false, +cnt_q.resultId) +}.getOrElse { matchable = false; cnt_q } + + case sum_q@AggregateExpression(Sum(expr_q), _, false, _) => +operator_a.outputList.find { + case alias: Alias if alias_m.contains(alias.toAttribute) && + alias_m(alias.toAttribute).child.isInstanceOf[AggregateExpression] && + alias_m(alias.toAttribute).child.asInstanceOf[AggregateExpression] + .aggregateFunction.isInstanceOf[Sum] => +val sum_a = alias_m(alias.toAttribute).child.asInstanceOf[AggregateExpression] +val expr_a = sum_a.aggregateFunction.asInstanceOf[Sum].child +if (sum_a.isDistinct != sum_q.isDistinct) { + false +} else { + expr_a.semanticEquals(expr_q) +} + + case attr: Attribute if alias_m.contains(attr) && + ali
[48/50] [abbrv] carbondata git commit: [CARBONDATA-2479] Multiple issue fixes in SDK writer and external table flow
[CARBONDATA-2479] Multiple issue fixes in SDK writer and external table flow [CARBONDATA-2479] Multiple issues: fixed external table path display fixed default value for array in AVRO fixed NPE when delete folder before the second select query fixed: avro float value precision change issue This closes #2306 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cf1b50bc Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cf1b50bc Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cf1b50bc Branch: refs/heads/spark-2.3 Commit: cf1b50bcc697be5353be469737a7dacdc57b1d7e Parents: 1d302a8 Author: ajantha-bhat Authored: Mon May 14 15:28:23 2018 +0530 Committer: ravipesala Committed: Thu May 17 18:47:04 2018 +0530 -- .../LatestFilesReadCommittedScope.java | 2 +- .../TestNonTransactionalCarbonTable.scala | 82 ++-- .../table/CarbonDescribeFormattedCommand.scala | 7 +- .../carbondata/sdk/file/AvroCarbonWriter.java | 12 +-- .../carbondata/sdk/file/CarbonReaderTest.java | 2 +- 5 files changed, 85 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf1b50bc/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java index 2306330..6106174 100644 --- a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java +++ b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java @@ -138,7 +138,7 @@ public class LatestFilesReadCommittedScope implements ReadCommittedScope { @Override public void takeCarbonIndexFileSnapShot() throws IOException { // Read the current file Path get the list of indexes from the path. CarbonFile file = FileFactory.getCarbonFile(carbonFilePath); -if (file == null) { +if (file.listFiles().length == 0) { // For nonTransactional table, files can be removed at any point of time. // So cannot assume files will be present throw new IOException("No files are present in the table location :" + carbonFilePath); http://git-wip-us.apache.org/repos/asf/carbondata/blob/cf1b50bc/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index 5ab1c60..cc3cbb5 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -1050,7 +1050,7 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { | "type": "record", | "fields": [ | { "name": "name", "type": "string"}, -| { "name": "age", "type": "int"}, +| { "name": "age", "type": "float"}, | { "name": "address", "type": { |"type" : "record", "name" : "my_address", |"fields" : [ @@ -1059,11 +1059,11 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { |]} """.stripMargin -val json = """ {"name":"bob", "age":10, "address" : {"street":"abc", "city":"bang"}} """ +val json = """ {"name":"bob", "age":10.24, "address" : {"street":"abc", "city":"bang"}} """ val fields = new Array[Field](3) fields(0) = new Field("name", DataTypes.STRING) -fields(1) = new Field("age", DataTypes.INT) +fields(1) = new Field("age", DataTypes.DOUBLE) val fld = new util.ArrayList[StructField] fld.add(new StructField("street", DataTypes.STRING)) fld.add(new StructField("city", DataTypes.STRING)) @@ -1340,11 +1340,10 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION |'$writerPath' """.stripMargin) - checkAnswer(sql("select * from sdkOutputTable"), Seq( - Row("bob", 10, Row("abc","bang")), - Row("bob", 10, Row("abc","bang")), - Row("bob", 10, Row("abc","bang" +
[30/50] [abbrv] carbondata git commit: [CARBONDATA-2459][DataMap] Add cache for bloom filter datamap
[CARBONDATA-2459][DataMap] Add cache for bloom filter datamap Loading bloom filter from bloomindex file is slow. Adding cache for this procedure will surely improve the query performance This closes #2300 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d14c403f Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d14c403f Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d14c403f Branch: refs/heads/spark-2.3 Commit: d14c403f6282ca8b574dae2fa5ab77caa5cf3c18 Parents: ffddba7 Author: xuchuanyin Authored: Fri May 11 21:49:43 2018 +0800 Committer: Jacky Li Committed: Sun May 13 02:05:30 2018 +0800 -- .../core/constants/CarbonCommonConstants.java | 13 ++ .../datamap/bloom/BloomCoarseGrainDataMap.java | 108 +++-- .../bloom/BloomCoarseGrainDataMapFactory.java | 4 + .../datamap/bloom/BloomDataMapCache.java| 232 +++ .../datamap/bloom/BloomDataMapWriter.java | 5 +- 5 files changed, 283 insertions(+), 79 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d14c403f/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 56607b9..f3a821b 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -1729,6 +1729,19 @@ public final class CarbonCommonConstants { // Property to enable parallel datamap loading for a table public static final String CARBON_LOAD_DATAMAPS_PARALLEL = "carbon.load.datamaps.parallel."; + /** + * Cache size in MB for bloom filter datamap. It is an integer and should be greater than 0 + * and it will be used during query. + */ + @CarbonProperty + public static final String CARBON_QUERY_DATAMAP_BLOOM_CACHE_SIZE = + "carbon.query.datamap.bloom.cache.size"; + + /** + * default value in size for cache size of bloom filter datamap. + */ + public static final String CARBON_QUERY_DATAMAP_BLOOM_CACHE_SIZE_DEFAULT_VAL = "512"; + private CarbonCommonConstants() { } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/d14c403f/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java -- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java index 725d5cd..09de25e 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java @@ -17,13 +17,10 @@ package org.apache.carbondata.datamap.bloom; -import java.io.DataInputStream; -import java.io.EOFException; +import java.io.File; import java.io.IOException; -import java.io.ObjectInputStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; import java.util.Set; @@ -45,13 +42,8 @@ import org.apache.carbondata.core.scan.expression.conditional.EqualToExpression; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; import org.apache.carbondata.core.util.CarbonUtil; -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.Multimap; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; /** * BloomDataCoarseGrainMap is constructed in blocklet level. For each indexed column, @@ -62,15 +54,16 @@ import org.apache.hadoop.fs.PathFilter; public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { private static final LogService LOGGER = LogServiceFactory.getLogService(BloomCoarseGrainDataMap.class.getName()); + public static final String BLOOM_INDEX_SUFFIX = ".bloomindex"; private Set indexedColumn; private List bloomIndexList; - private Multimap> indexCol2BloomDMList; - public static final String BLOOM_INDEX_SUFFIX = ".bloomindex"; private String shardName; + private BloomDataMapCache bloomDataMapCache; + private Path indexPath; @Override public void init(DataMapModel dataMapModel) throws IOException { -Path indexPath = FileFactory.getPath(dataMapModel.getFilePath()); +this.indexPath = File
[38/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/orders.csv -- diff --git a/integration/spark-common-test/src/test/resources/tpch/orders.csv b/integration/spark-common-test/src/test/resources/tpch/orders.csv new file mode 100644 index 000..56b3064 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/tpch/orders.csv @@ -0,0 +1,1000 @@ +1|36901|O|173665.47|1996-01-02|5-LOW|Clerk#00951|0|nstructions sleep furiously among | +2|78002|O|46929.18|1996-12-01|1-URGENT|Clerk#00880|0| foxes. pending accounts at the pending, silent asymptot| +3|123314|F|193846.25|1993-10-14|5-LOW|Clerk#00955|0|sly final accounts boost. carefully regular ideas cajole carefully. depos| +4|136777|O|32151.78|1995-10-11|5-LOW|Clerk#00124|0|sits. slyly regular warthogs cajole. regular, regular theodolites acro| +5|44485|F|144659.20|1994-07-30|5-LOW|Clerk#00925|0|quickly. bold deposits sleep slyly. packages use slyly| +6|55624|F|58749.59|1992-02-21|4-NOT SPECIFIED|Clerk#00058|0|ggle. special, final requests are against the furiously specia| +7|39136|O|252004.18|1996-01-10|2-HIGH|Clerk#00470|0|ly special requests | +32|130057|O|208660.75|1995-07-16|2-HIGH|Clerk#00616|0|ise blithely bold, regular requests. quickly unusual dep| +33|66958|F|163243.98|1993-10-27|3-MEDIUM|Clerk#00409|0|uriously. furiously final request| +34|61001|O|58949.67|1998-07-21|3-MEDIUM|Clerk#00223|0|ly final packages. fluffily final deposits wake blithely ideas. spe| +35|127588|O|253724.56|1995-10-23|4-NOT SPECIFIED|Clerk#00259|0|zzle. carefully enticing deposits nag furio| +36|115252|O|68289.96|1995-11-03|1-URGENT|Clerk#00358|0| quick packages are blithely. slyly silent accounts wake qu| +37|86116|F|206680.66|1992-06-03|3-MEDIUM|Clerk#00456|0|kly regular pinto beans. carefully unusual waters cajole never| +38|124828|O|82500.05|1996-08-21|4-NOT SPECIFIED|Clerk#00604|0|haggle blithely. furiously express ideas haggle blithely furiously regular re| +39|81763|O|341734.47|1996-09-20|3-MEDIUM|Clerk#00659|0|ole express, ironic requests: ir| +64|32113|F|39414.99|1994-07-16|3-MEDIUM|Clerk#00661|0|wake fluffily. sometimes ironic pinto beans about the dolphin| +65|16252|P|110643.60|1995-03-18|1-URGENT|Clerk#00632|0|ular requests are blithely pending orbits-- even requests against the deposit| +66|129200|F|103740.67|1994-01-20|5-LOW|Clerk#00743|0|y pending requests integrate| +67|56614|O|169405.01|1996-12-19|4-NOT SPECIFIED|Clerk#00547|0|symptotes haggle slyly around the furiously iron| +68|28547|O|330793.52|1998-04-18|3-MEDIUM|Clerk#00440|0| pinto beans sleep carefully. blithely ironic deposits haggle furiously acro| +69|84487|F|197689.49|1994-06-04|4-NOT SPECIFIED|Clerk#00330|0| depths atop the slyly thin deposits detect among the furiously silent accou| +70|64340|F|113534.42|1993-12-18|5-LOW|Clerk#00322|0| carefully ironic request| +71|3373|O|276992.74|1998-01-24|4-NOT SPECIFIED|Clerk#00271|0| express deposits along the blithely regul| +96|107779|F|68989.90|1994-04-17|2-HIGH|Clerk#00395|0|oost furiously. pinto| +97|21061|F|110512.84|1993-01-29|3-MEDIUM|Clerk#00547|0|hang blithely along the regular accounts. furiously even ideas after the| +98|104480|F|69168.33|1994-09-25|1-URGENT|Clerk#00448|0|c asymptotes. quickly regular packages should have to nag re| +99|88910|F|112126.95|1994-03-13|4-NOT SPECIFIED|Clerk#00973|0|e carefully ironic packages. pending| +100|147004|O|187782.63|1998-02-28|4-NOT SPECIFIED|Clerk#00577|0|heodolites detect slyly alongside of the ent| +101|27998|O|124906.11|1996-03-17|3-MEDIUM|Clerk#00419|0|ding accounts above the slyly final asymptote| +102|716|O|164529.10|1997-05-09|2-HIGH|Clerk#00596|0| slyly according to the asymptotes. carefully final packages integrate furious| +103|29101|O|126990.79|1996-06-20|4-NOT SPECIFIED|Clerk#00090|0|ges. carefully unusual instructions haggle quickly regular f| +128|73957|F|66195.16|1992-06-15|1-URGENT|Clerk#00385|0|ns integrate fluffily. ironic asymptotes after the regular excuses nag around | +129|71134|F|261013.14|1992-11-19|5-LOW|Clerk#00859|0|ing tithes. carefully pending deposits boost about the silently express | +130|36964|F|189484.12|1992-05-08|2-HIGH|Clerk#00036|0|le slyly unusual, regular packages? express deposits det| +131|92749|F|130464.09|1994-06-08|3-MEDIUM|Clerk#00625|0|after the fluffily special foxes integrate s| +132|26395|F|144947.21|1993-06-11|3-MEDIUM|Clerk#00488|0|sits are daringly accounts. carefully regular foxes sleep slyly about the| +133|44000|O|114663.57|1997-11-29|1-URGENT|Clerk#00738|0|usly final asymptotes | +134|6199|F|200354.30|1992-05-01|4-NOT SPECIFIED|Clerk#00711|0|lar theodolites boos| +135|60481|O|213713.99|1995-10-21|4-NOT SPECIFIED|Clerk#00
[23/50] [abbrv] carbondata git commit: [CARBONDATA-2460] [CARBONDATA-2461] [CARBONDATA-2462] Fixed bug in AvroCarbonWriter
[CARBONDATA-2460] [CARBONDATA-2461] [CARBONDATA-2462] Fixed bug in AvroCarbonWriter Issue1: If Null type is passed from avro schema then Unsupported data type exception is thrown. Solution1: Ignore column which has NULL data type. Issue2: Array fields were being cast to ArrayList without any instance check. Solution2: Check the instance of Array fields and cast appropriately. This closes #2291 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3d8b085a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3d8b085a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3d8b085a Branch: refs/heads/spark-2.3 Commit: 3d8b085a55f551122c7528b6981f1785a44fef3c Parents: 61afa42 Author: kunal642 Authored: Wed May 9 18:32:23 2018 +0530 Committer: kumarvishal09 Committed: Fri May 11 13:38:53 2018 +0530 -- .../TestNonTransactionalCarbonTable.scala | 47 - .../carbondata/sdk/file/AvroCarbonWriter.java | 103 ++- 2 files changed, 122 insertions(+), 28 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3d8b085a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index 376501b..86fda21 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -32,8 +32,6 @@ import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandExcepti import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastore.filesystem.CarbonFile import org.apache.carbondata.core.datastore.impl.FileFactory -import org.apache.carbondata.core.util.CarbonUtil -import org.apache.carbondata.sdk.file.AvroCarbonWriter import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} import scala.collection.JavaConverters._ import scala.collection.mutable @@ -43,7 +41,7 @@ import org.apache.commons.lang.CharEncoding import tech.allegro.schema.json2avro.converter.JsonAvroConverter import org.apache.carbondata.core.metadata.datatype.{DataTypes, StructField} -import org.apache.carbondata.sdk.file.{CarbonWriter, CarbonWriterBuilder, Field, Schema} +import org.apache.carbondata.sdk.file.{AvroCarbonWriter, CarbonWriter, CarbonWriterBuilder, Field, Schema} class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { @@ -51,7 +49,7 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { var writerPath = new File(this.getClass.getResource("/").getPath + "../." + - "./src/test/resources/SparkCarbonFileFormat/WriterOutput/") +"./target/SparkCarbonFileFormat/WriterOutput/") .getCanonicalPath //getCanonicalPath gives path with \, so code expects /. Need to handle in code ? writerPath = writerPath.replace("\\", "/") @@ -1795,6 +1793,47 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { }.getMessage.toLowerCase.contains("column: name specified in sort columns")) } + test("test if load is passing with NULL type") { +val schema1 = + """{ +| "namespace": "com.apache.schema", +| "type": "record", +| "name": "StudentActivity", +| "fields": [ +| { +| "name": "id", +| "type": "null" +| }, +| { +| "name": "course_details", +| "type": { +| "name": "course_details", +| "type": "record", +| "fields": [ +| { +| "name": "course_struct_course_time", +| "type": "string" +| } +| ] +| } +| } +| ] +|}""".stripMargi
[36/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/supplier.csv -- diff --git a/integration/spark-common-test/src/test/resources/tpch/supplier.csv b/integration/spark-common-test/src/test/resources/tpch/supplier.csv new file mode 100644 index 000..0f20c29 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/tpch/supplier.csv @@ -0,0 +1,1000 @@ +1|Supplier#1| N kD4on9OM Ipw3,gf0JBoQDd7tgrzrddZ|17|27-918-335-1736|5755.94|each slyly above the careful| +2|Supplier#2|89eJ5ksX3ImxJQBvxObC,|5|15-679-861-2259|4032.68| slyly bold instructions. idle dependen| +3|Supplier#3|q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3|1|11-383-516-1199|4192.40|blithely silent requests after the express dependencies are sl| +4|Supplier#4|Bk7ah4CK8SYQTepEmvMkkgMwg|15|25-843-787-7479|4641.08|riously even requests above the exp| +5|Supplier#5|Gcdm2rJRzl5qlTVzc|11|21-151-690-3663|-283.84|. slyly regular pinto bea| +6|Supplier#6|tQxuVm7s7CnK|14|24-696-997-4969|1365.79|final accounts. regular dolphins use against the furiously ironic decoys. | +7|Supplier#7|s,4TicNGB4uO6PaSqNBUq|23|33-990-965-2201|6820.35|s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit| +8|Supplier#8|9Sq4bBH2FQEmaFOocY45sRTxo6yuoG|17|27-498-742-3860|7627.85|al pinto beans. asymptotes haggl| +9|Supplier#9|1KhUgZegwM3ua7dsYmekYBsK|10|20-403-398-8662|5302.37|s. unusual, even requests along the furiously regular pac| +10|Supplier#00010|Saygah3gYWMp72i PY|24|34-852-489-8585|3891.91|ing waters. regular requests ar| +11|Supplier#00011|JfwTs,LZrV, M,9C|18|28-613-996-1505|3393.08|y ironic packages. slyly ironic accounts affix furiously; ironically unusual excuses across the flu| +12|Supplier#00012|aLIW q0HYd|8|18-179-925-7181|1432.69|al packages nag alongside of the bold instructions. express, daring accounts| +13|Supplier#00013|HK71HQyWoqRWOX8GI FpgAifW,2PoH|3|13-727-620-7813|9107.22|requests engage regularly instructions. furiously special requests ar| +14|Supplier#00014|EXsnO5pTNj4iZRm|15|25-656-247-5058|9189.82|l accounts boost. fluffily bold warhorses wake| +15|Supplier#00015|olXVbNBfVzRqgokr1T,Ie|8|18-453-357-6394|308.56| across the furiously regular platelets wake even deposits. quickly express she| +16|Supplier#00016|YjP5C55zHDXL7LalK27zfQnwejdpin4AMpvh|22|32-822-502-4215|2972.26|ously express ideas haggle quickly dugouts? fu| +17|Supplier#00017|c2d,ESHRSkK3WYnxpgw6aOqN0q|19|29-601-884-9219|1687.81|eep against the furiously bold ideas. fluffily bold packa| +18|Supplier#00018|PGGVE5PWAMwKDZw |16|26-729-551-1115|7040.82|accounts snooze slyly furiously bold | +19|Supplier#00019|edZT3es,nBFD8lBXTGeTl|24|34-278-310-2731|6150.38|refully final foxes across the dogged theodolites sleep slyly abou| +20|Supplier#00020|iybAE,RmTymrZVYaFZva2SH,j|3|13-715-945-6730|530.82|n, ironic ideas would nag blithely about the slyly regular accounts. silent, expr| +21|Supplier#00021|81CavellcrJ0PQ3CPBID0Z0JwyJm0ka5igEs|2|12-253-590-5816|9365.80|d. instructions integrate sometimes slyly pending instructions. accounts nag among the | +22|Supplier#00022|okiiQFk 8lm6EVX6Q0,bEcO|4|14-144-830-2814|-966.20| ironically among the deposits. closely expre| +23|Supplier#00023|ssetugTcXc096qlD7 2TL5crEEeS3zk|9|19-559-422-5776|5926.41|ges could have to are ironic deposits. regular, even request| +24|Supplier#00024|C4nPvLrVmKPPabFCj|0|10-620-939-2254|9170.71|usly pending deposits. slyly final accounts run | +25|Supplier#00025|RCQKONXMFnrodzz6w7fObFVV6CUm2q|22|32-431-945-3541|9198.31|ely regular deposits. carefully regular sauternes engage furiously above the regular accounts. idly | +26|Supplier#00026|iV,MHzAx6Z939uzFNkq09M0a1 MBfH7|21|31-758-894-4436|21.18| ideas poach carefully after the blithely bold asymptotes. furiously pending theodoli| +27|Supplier#00027|lC4CjKwNHUr6L4xIpzOBK4NlHkFTg|18|28-708-999-2028|1887.62|s according to the quickly regular hockey playe| +28|Supplier#00028|GBhvoRh,7YIN V|0|10-538-384-8460|-891.99|ld requests across the pinto beans are carefully against the quickly final courts. accounts sleep | +29|Supplier#00029|658tEqXLPvRd6xpFdqC2|1|11-555-705-5922|-811.62|y express ideas play furiously. even accounts sleep fluffily across the accounts. careful| +30|Supplier#00030|84NmC1rmQfO0fj3zkobLT|16|26-940-594-4852|8080.14|ias. carefully silent accounts cajole blithely. pending, special accounts cajole quickly above the f| +31|Supplier#00031|fRJimA7zchyApqRLHcQeocVpP|16|26-515-530-4159|5916.91|into beans wake after the special packages. slyly fluffy requests cajole furio| +32|Supplier#00032|yvoD3TtZSx1skQNCK8agk5bZlZLug|23|33-484-637-7873|3556.47|usly even depths. quickly ironic theodolites s| +33|S
[42/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
[CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting Integrate MV DataMap to Carbon This closes #2302 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2881c6bb Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2881c6bb Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2881c6bb Branch: refs/heads/spark-2.3 Commit: 2881c6bbc17f34c0e17b6483130e70311e41c653 Parents: bf73e9f Author: ravipesala Authored: Sat May 12 10:41:01 2018 +0530 Committer: Jacky Li Committed: Sun May 13 17:08:56 2018 +0800 -- .../datamap/status/DataMapStatusManager.java| 16 + .../mv/rewrite/MVCreateTestCase.scala | 16 + .../mv/rewrite/MVSampleTestCase.scala | 16 + .../carbondata/mv/rewrite/MVTPCDSTestCase.scala | 16 + .../carbondata/mv/rewrite/MVTpchTestCase.scala | 16 + .../carbondata/mv/rewrite/Tpcds_1_4_Suite.scala | 80 -- .../mv/plans/LogicalToModularPlanSuite.scala|8 +- .../carbondata/mv/plans/ModularToSQLSuite.scala |5 +- .../src/test/resources/data_big.csv | 91 ++ .../src/test/resources/tpch/customers.csv | 500 + .../src/test/resources/tpch/lineitem.csv| 1000 ++ .../src/test/resources/tpch/nation.csv | 25 + .../src/test/resources/tpch/orders.csv | 1000 ++ .../src/test/resources/tpch/region.csv |5 + .../src/test/resources/tpch/supplier.csv| 1000 ++ .../apache/spark/sql/hive/CarbonAnalyzer.scala | 19 +- pom.xml |1 + 17 files changed, 3727 insertions(+), 87 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/core/src/main/java/org/apache/carbondata/core/datamap/status/DataMapStatusManager.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/status/DataMapStatusManager.java b/core/src/main/java/org/apache/carbondata/core/datamap/status/DataMapStatusManager.java index b540146..d0ff589 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/status/DataMapStatusManager.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/status/DataMapStatusManager.java @@ -53,6 +53,22 @@ public class DataMapStatusManager { return storageProvider.getDataMapStatusDetails(); } + /** + * Get enabled datamap status details + * @return + * @throws IOException + */ + public static DataMapStatusDetail[] getEnabledDataMapStatusDetails() throws IOException { +DataMapStatusDetail[] dataMapStatusDetails = storageProvider.getDataMapStatusDetails(); +List statusDetailList = new ArrayList<>(); +for (DataMapStatusDetail statusDetail : dataMapStatusDetails) { + if (statusDetail.getStatus() == DataMapStatus.ENABLED) { +statusDetailList.add(statusDetail); + } +} +return statusDetailList.toArray(new DataMapStatusDetail[statusDetailList.size()]); + } + public static Map readDataMapStatusMap() throws IOException { DataMapStatusDetail[] details = storageProvider.getDataMapStatusDetails(); Map map = new HashMap<>(details.length); http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVCreateTestCase.scala -- diff --git a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVCreateTestCase.scala b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVCreateTestCase.scala index 184fdc1..4b636db 100644 --- a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVCreateTestCase.scala +++ b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVCreateTestCase.scala @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.carbondata.mv.rewrite import java.io.File
[15/50] [abbrv] carbondata git commit: [CARBONDATA-2401] Date and Timestamp options are not working in SDK
[CARBONDATA-2401] Date and Timestamp options are not working in SDK Issue:- Date and Timestamp format is passed in options of SDK but data load is failed even correct data is set as per format . Cause:- Load model is getting overwritten with default . Fix :- if user has passed the options then Load model should use from Options otherwise from Default. This closes #2227 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b2060c61 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b2060c61 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b2060c61 Branch: refs/heads/spark-2.3 Commit: b2060c61104895d5599b1c044b725d56a39748f8 Parents: ceb7c8d Author: BJangir Authored: Wed Apr 25 18:27:58 2018 +0530 Committer: kumarvishal09 Committed: Thu May 10 14:00:59 2018 +0530 -- .../TestNonTransactionalCarbonTable.scala | 39 .../loading/model/CarbonLoadModelBuilder.java | 4 -- 2 files changed, 39 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/b2060c61/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index 2f88c40..ca6ac3c 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -17,6 +17,8 @@ package org.apache.carbondata.spark.testsuite.createTable +import java.sql.Timestamp +import java.io.{File, FileFilter, IOException} import java.io.{File, FileFilter} import java.util @@ -31,6 +33,7 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastore.filesystem.CarbonFile import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.util.CarbonUtil +import org.apache.carbondata.sdk.file.{CarbonWriter, CarbonWriterBuilder, Field, Schema} import org.apache.carbondata.sdk.file.{AvroCarbonWriter, CarbonWriter, Field, Schema} import scala.collection.JavaConverters._ import scala.collection.mutable @@ -39,6 +42,10 @@ import org.apache.avro import org.apache.commons.lang.CharEncoding import tech.allegro.schema.json2avro.converter.JsonAvroConverter +import org.apache.carbondata.core.metadata.datatype.{DataTypes, StructField} +import org.apache.carbondata.sdk.file.{CarbonWriter, CarbonWriterBuilder, Field, Schema} + + class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { var writerPath = new File(this.getClass.getResource("/").getPath @@ -669,6 +676,38 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { cleanTestData() } + test("test custom format for date and timestamp in sdk") { + +cleanTestData() +var options = Map("dateformat" -> "dd-MM-" ,"timestampformat" -> "dd-MM- HH:mm:ss").asJava + +val fields: Array[Field] = new Array[Field](4) +fields(0) = new Field("stringField", DataTypes.STRING) +fields(1) = new Field("intField", DataTypes.INT) +fields(2) = new Field("mydate", DataTypes.DATE) +fields(3) = new Field("mytime", DataTypes.TIMESTAMP) + +val builder: CarbonWriterBuilder = CarbonWriter.builder.withSchema(new Schema(fields)) + .outputPath(writerPath).isTransactionalTable(false).withLoadOptions(options) + +val writer: CarbonWriter = builder.buildWriterForCSVInput +writer.write(Array("babu","1","02-01-2002","02-01-2002 01:01:00")); +writer.close() + +assert(new File(writerPath).exists()) + +sql("DROP TABLE IF EXISTS sdkOutputTable") +sql( + s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + +checkAnswer(sql("select * from sdkOutputTable"), Seq( + Row("babu", 1, java.sql.Date.valueOf("2002-01-02"),Timestamp.valueOf("2002-01-02 01:01:00.0" +sql("DROP TABLE sdkOutputTable") +cleanTestData() + + } + test("test huge data write with one batch having bad record") { val exception = http://git-wip-us.apache.org/repos/asf/carbondata/blob/b2060c61/processing/src/main/java/org/apache/carbondata/processing/loading/model/CarbonLoadModelBuilder.java --
[24/50] [abbrv] carbondata git commit: [CARBONDATA-2471]Added support for No Dictionary Complex type for Double, Decimal, Date type in SDK
[CARBONDATA-2471]Added support for No Dictionary Complex type for Double, Decimal, Date type in SDK Added support for No Dictionary Complex type for Double, Decimal, Date type in SDK This is closes #2297 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d85fb72e Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d85fb72e Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d85fb72e Branch: refs/heads/spark-2.3 Commit: d85fb72e2f24107769c7b5ce7d454d52cbaee49d Parents: 3d8b085 Author: kumarvishal09 Authored: Thu May 10 22:52:09 2018 +0530 Committer: ravipesala Committed: Fri May 11 15:38:27 2018 +0530 -- .../scan/complextypes/PrimitiveQueryType.java | 18 +- .../apache/carbondata/core/util/ByteUtil.java | 8 + .../carbondata/core/util/DataTypeUtil.java | 18 ++ ...ransactionalCarbonTableWithComplexType.scala | 232 +++ .../command/carbonTableSchemaCommon.scala | 9 +- .../processing/datatypes/PrimitiveDataType.java | 29 ++- 6 files changed, 297 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d85fb72e/core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java b/core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java index 2db590b..edae4da 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/complextypes/PrimitiveQueryType.java @@ -22,13 +22,16 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.carbondata.core.cache.dictionary.Dictionary; +import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory; import org.apache.carbondata.core.keygenerator.mdkey.Bits; import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.scan.filter.GenericQueryType; import org.apache.carbondata.core.scan.processor.RawBlockletColumnChunks; +import org.apache.carbondata.core.util.ByteUtil; import org.apache.carbondata.core.util.DataTypeUtil; public class PrimitiveQueryType extends ComplexQueryType implements GenericQueryType { @@ -46,6 +49,8 @@ public class PrimitiveQueryType extends ComplexQueryType implements GenericQuery private boolean isDictionary; + private DirectDictionaryGenerator directDictGenForDate; + public PrimitiveQueryType(String name, String parentname, int blockIndex, DataType dataType, int keySize, Dictionary dictionary, boolean isDirectDictionary) { @@ -57,6 +62,8 @@ public class PrimitiveQueryType extends ComplexQueryType implements GenericQuery this.parentname = parentname; this.isDirectDictionary = isDirectDictionary; this.isDictionary = (dictionary != null && isDirectDictionary == false); +this.directDictGenForDate = + DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(DataTypes.DATE); } @Override public void addChildren(GenericQueryType children) { @@ -116,7 +123,16 @@ public class PrimitiveQueryType extends ComplexQueryType implements GenericQuery int size = dataBuffer.getInt(); byte[] value = new byte[size]; dataBuffer.get(value, 0, size); - actualData = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(value, this.dataType); + if (dataType == DataTypes.DATE) { +if (value.length == 0) { + actualData = null; +} else { + actualData = this.directDictGenForDate.getValueFromSurrogate( + ByteUtil.toInt(value, 0, CarbonCommonConstants.INT_SIZE_IN_BYTE)); +} + } else { +actualData = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(value, this.dataType); + } } else { // Dictionary Column byte[] data = new byte[keySize]; http://git-wip-us.apache.org/repos/asf/carbondata/blob/d85fb72e/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java index 52fc3c3..661384c 100644 --- a/core/src/main/java/org/apache/carbond
[49/50] [abbrv] carbondata git commit: [CARBONDATA-2477]Fixed No dictionary Complex type with double/date/decimal data type
[CARBONDATA-2477]Fixed No dictionary Complex type with double/date/decimal data type Problem: SDK create table with No Dictionary complex type is failing when complex type child contain double/date/decimal data type Solution: In complex type validation , it is not allowing double/date/decimal data , need to remove the same Changed no dictionary complex type storage format, instead of storing length in int , now storing in short to reduce storage space This closes #2304 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6297ea0b Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6297ea0b Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6297ea0b Branch: refs/heads/spark-2.3 Commit: 6297ea0b4092539fa0aa2c6f772d6984850c6110 Parents: cf1b50b Author: kumarvishal09 Authored: Mon May 14 14:17:38 2018 +0530 Committer: ravipesala Committed: Thu May 17 19:05:30 2018 +0530 -- .../carbondata/core/datastore/ColumnType.java | 14 ++- .../core/datastore/page/ColumnPage.java | 82 -- .../core/datastore/page/ComplexColumnPage.java | 16 +++- .../core/datastore/page/LazyColumnPage.java | 13 ++- .../datastore/page/SafeFixLengthColumnPage.java | 25 +- .../datastore/page/SafeVarLengthColumnPage.java | 21 + .../page/UnsafeFixLengthColumnPage.java | 39 - .../datastore/page/VarLengthColumnPageBase.java | 90 ++-- .../page/encoding/ColumnPageEncoder.java| 9 +- .../scan/complextypes/PrimitiveQueryType.java | 4 +- .../core/scan/complextypes/StructQueryType.java | 8 +- .../apache/carbondata/core/util/ByteUtil.java | 9 ++ ...ransactionalCarbonTableWithComplexType.scala | 76 - .../processing/datatypes/ArrayDataType.java | 7 ++ .../processing/datatypes/GenericDataType.java | 4 + .../processing/datatypes/PrimitiveDataType.java | 17 ++-- .../processing/datatypes/StructDataType.java| 30 +++ .../carbondata/processing/store/TablePage.java | 6 +- .../sdk/file/CarbonWriterBuilder.java | 9 -- 19 files changed, 407 insertions(+), 72 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6297ea0b/core/src/main/java/org/apache/carbondata/core/datastore/ColumnType.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/ColumnType.java b/core/src/main/java/org/apache/carbondata/core/datastore/ColumnType.java index f98307b..8bbf12d 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/ColumnType.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/ColumnType.java @@ -31,7 +31,13 @@ public enum ColumnType { COMPLEX, // measure column, numerical data type - MEASURE; + MEASURE, + + COMPLEX_STRUCT, + + COMPLEX_ARRAY, + + COMPLEX_PRIMITIVE; public static ColumnType valueOf(int ordinal) { if (ordinal == GLOBAL_DICTIONARY.ordinal()) { @@ -44,6 +50,12 @@ public enum ColumnType { return COMPLEX; } else if (ordinal == MEASURE.ordinal()) { return MEASURE; +} else if (ordinal == COMPLEX_STRUCT.ordinal()) { + return COMPLEX_STRUCT; +} else if (ordinal == COMPLEX_ARRAY.ordinal()) { + return COMPLEX_ARRAY; +} else if (ordinal == COMPLEX_PRIMITIVE.ordinal()) { + return COMPLEX_PRIMITIVE; } else { throw new RuntimeException("create ColumnType with invalid ordinal: " + ordinal); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6297ea0b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java index 68269fb..69ed437 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java @@ -22,6 +22,7 @@ import java.math.BigDecimal; import java.util.BitSet; import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.ColumnType; import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.datastore.compression.Compressor; import org.apache.carbondata.core.datastore.compression.CompressorFactory; @@ -153,6 +154,19 @@ public abstract class ColumnPage { } } + private static ColumnPage createFixLengthByteArrayPage(TableSpec.ColumnSpec columnSpec, + DataType dataType, int pageSize, int eachValueSize) { +if (unsafe) { + try { +return new UnsafeFixLengthColumnPage(columnSpec, dataType
[32/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
http://git-wip-us.apache.org/repos/asf/carbondata/blob/bf73e9fe/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVTpchTestCase.scala -- diff --git a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVTpchTestCase.scala b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVTpchTestCase.scala new file mode 100644 index 000..89813b5 --- /dev/null +++ b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/MVTpchTestCase.scala @@ -0,0 +1,247 @@ +package org.apache.carbondata.mv.rewrite + +import java.io.File + +import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.execution.datasources.LogicalRelation +import org.apache.spark.sql.test.util.QueryTest +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + +class MVTpchTestCase extends QueryTest with BeforeAndAfterAll { + + override def beforeAll { +drop() +val projectPath = new File(this.getClass.getResource("/").getPath + "../../../../../") + .getCanonicalPath.replaceAll("", "/") +val integrationPath = s"$projectPath/integration" +val resourcesPath = s"$integrationPath/spark-common-test/src/test/resources" + +sql(s"""create table if not exists LINEITEM( L_SHIPDATE date, L_SHIPMODE string, L_SHIPINSTRUCT string, L_RETURNFLAG string, L_RECEIPTDATE date, L_ORDERKEY INT , L_PARTKEY INT , L_SUPPKEY string, L_LINENUMBER int, L_QUANTITY double, L_EXTENDEDPRICE double, L_DISCOUNT double, L_TAX double, L_LINESTATUS string, L_COMMITDATE date, L_COMMENT string) STORED BY 'org.apache.carbondata.format'""") +sql(s"""create table if not exists ORDERS( O_ORDERDATE date, O_ORDERPRIORITY string, O_ORDERSTATUS string, O_ORDERKEY int, O_CUSTKEY string, O_TOTALPRICE double, O_CLERK string, O_SHIPPRIORITY int, O_COMMENT string) STORED BY 'org.apache.carbondata.format'""") +sql(s"""create table if not exists CUSTOMER( C_MKTSEGMENT string, C_NATIONKEY string, C_CUSTKEY string, C_NAME string, C_ADDRESS string, C_PHONE string, C_ACCTBAL double, C_COMMENT string) STORED BY 'org.apache.carbondata.format'""") +sql(s"""create table if not exists REGION( R_NAME string, R_REGIONKEY string, R_COMMENT string) STORED BY 'org.apache.carbondata.format'""") +sql(s"""create table if not exists NATION ( N_NAME string, N_NATIONKEY string, N_REGIONKEY string, N_COMMENT string) STORED BY 'org.apache.carbondata.format'""") +sql(s"""create table if not exists SUPPLIER(S_COMMENT string,S_SUPPKEY string,S_NAME string, S_ADDRESS string, S_NATIONKEY string, S_PHONE string, S_ACCTBAL double) STORED BY 'org.apache.carbondata.format'""") + +sql(s"""load data inpath "$resourcesPath/tpch/lineitem.csv" into table lineitem options('DELIMITER'='|','FILEHEADER'='L_ORDERKEY,L_PARTKEY,L_SUPPKEY,L_LINENUMBER,L_QUANTITY,L_EXTENDEDPRICE,L_DISCOUNT,L_TAX,L_RETURNFLAG,L_LINESTATUS,L_SHIPDATE,L_COMMITDATE,L_RECEIPTDATE,L_SHIPINSTRUCT,L_SHIPMODE,L_COMMENT')""") +sql(s"""load data inpath "$resourcesPath/tpch/orders.csv" into table ORDERS options('DELIMITER'='|','FILEHEADER'='O_ORDERKEY,O_CUSTKEY,O_ORDERSTATUS,O_TOTALPRICE,O_ORDERDATE,O_ORDERPRIORITY,O_CLERK,O_SHIPPRIORITY,O_COMMENT')""") +sql(s"""load data inpath "$resourcesPath/tpch/customers.csv" into table CUSTOMER options('DELIMITER'='|','FILEHEADER'='C_CUSTKEY,C_NAME,C_ADDRESS,C_NATIONKEY,C_PHONE,C_ACCTBAL,C_MKTSEGMENT,C_COMMENT')""") +sql(s"""load data inpath "$resourcesPath/tpch/region.csv" into table REGION options('DELIMITER'='|','FILEHEADER'='R_REGIONKEY,R_NAME,R_COMMENT')""") +sql(s"""load data inpath "$resourcesPath/tpch/nation.csv" into table NATION options('DELIMITER'='|','FILEHEADER'='N_NATIONKEY,N_NAME,N_REGIONKEY,N_COMMENT')""") +sql(s"""load data inpath "$resourcesPath/tpch/supplier.csv" into table SUPPLIER options('DELIMITER'='|','FILEHEADER'='S_SUPPKEY,S_NAME,S_ADDRESS,S_NATIONKEY,S_PHONE,S_ACCTBAL,S_COMMENT')""") + + +sql(s"""create table if not exists LINEITEM1( L_SHIPDATE date, L_SHIPMODE string, L_SHIPINSTRUCT string, L_RETURNFLAG string, L_RECEIPTDATE date, L_ORDERKEY INT , L_PARTKEY INT , L_SUPPKEY string, L_LINENUMBER int, L_QUANTITY double, L_EXTENDEDPRICE double, L_DISCOUNT double, L_TAX double, L_LINESTATUS string, L_COMMITDATE date, L_COMMENT string) STORED BY 'org.apache.carbondata.format'""") +sql(s"""create table if not exists ORDERS1( O_ORDERDATE date, O_ORDERPRIORITY string, O_ORDERSTATUS string, O_ORDERKEY int, O_CUSTKEY string, O_TOTALPRICE double, O_CLERK string, O_SHIPPRIORITY int, O_COMMENT string) STORED BY 'org.apache.carbondata.format'""") +sql(s"""create table if not exists CUSTOMER1(
[41/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark-common-test/src/test/resources/tpch/customers.csv -- diff --git a/integration/spark-common-test/src/test/resources/tpch/customers.csv b/integration/spark-common-test/src/test/resources/tpch/customers.csv new file mode 100644 index 000..7e46e5f --- /dev/null +++ b/integration/spark-common-test/src/test/resources/tpch/customers.csv @@ -0,0 +1,500 @@ +1|Customer#1|IVhzIApeRb ot,c,E|15|25-989-741-2988|711.56|BUILDING|to the even, regular platelets. regular, ironic epitaphs nag e| +2|Customer#2|XSTf4,NCwDVaWNe6tEgvwfmRchLXak|13|23-768-687-3665|121.65|AUTOMOBILE|l accounts. blithely ironic theodolites integrate boldly: caref| +3|Customer#3|MG9kdTD2WBHm|1|11-719-748-3364|7498.12|AUTOMOBILE| deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov| +4|Customer#4|XxVSJsLAGtn|4|14-128-190-5944|2866.83|MACHINERY| requests. final, regular ideas sleep final accou| +5|Customer#5|KvpyuHCplrB84WgAiGV6sYpZq7Tj|3|13-750-942-6364|794.47|HOUSEHOLD|n accounts will have to unwind. foxes cajole accor| +6|Customer#6|sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn|20|30-114-968-4951|7638.57|AUTOMOBILE|tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious| +7|Customer#7|TcGe5gaZNgVePxU5kRrvXBfkasDTea|18|28-190-982-9759|9561.95|AUTOMOBILE|ainst the ironic, express theodolites. express, even pinto beans among the exp| +8|Customer#8|I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5|17|27-147-574-9335|6819.74|BUILDING|among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide| +9|Customer#9|xKiAFTjUsCuxfeleNqefumTrjS|8|18-338-906-3675|8324.07|FURNITURE|r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl| +10|Customer#00010|6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2|5|15-741-346-9870|2753.54|HOUSEHOLD|es regular deposits haggle. fur| +11|Customer#00011|PkWS 3HlXqwTuzrKg633BEi|23|33-464-151-3439|-272.60|BUILDING|ckages. requests sleep slyly. quickly even pinto beans promise above the slyly regular pinto beans. | +12|Customer#00012|9PWKuhzT4Zr1Q|13|23-791-276-1263|3396.49|HOUSEHOLD| to the carefully final braids. blithely regular requests nag. ironic theodolites boost quickly along| +13|Customer#00013|nsXQu0oVjD7PM659uC3SRSp|3|13-761-547-5974|3857.34|BUILDING|ounts sleep carefully after the close frays. carefully bold notornis use ironic requests. blithely| +14|Customer#00014|KXkletMlL2JQEA |1|11-845-129-3851|5266.30|FURNITURE|, ironic packages across the unus| +15|Customer#00015|YtWggXoOLdwdo7b0y,BZaGUQMLJMX1Y,EC,6Dn|23|33-687-542-7601|2788.52|HOUSEHOLD| platelets. regular deposits detect asymptotes. blithely unusual packages nag slyly at the fluf| +16|Customer#00016|cYiaeMLZSMAOQ2 d0W,|10|20-781-609-3107|4681.03|FURNITURE|kly silent courts. thinly regular theodolites sleep fluffily after | +17|Customer#00017|izrh 6jdqtp2eqdtbkswDD8SG4SzXruMfIXyR7|2|12-970-682-3487|6.34|AUTOMOBILE|packages wake! blithely even pint| +18|Customer#00018|3txGO AiuFux3zT0Z9NYaFRnZt|6|16-155-215-1315|5494.43|BUILDING|s sleep. carefully even instructions nag furiously alongside of t| +19|Customer#00019|uc,3bHIx84H,wdrmLOjVsiqXCq2tr|18|28-396-526-5053|8914.71|HOUSEHOLD| nag. furiously careful packages are slyly at the accounts. furiously regular in| +20|Customer#00020|JrPk8Pqplj4Ne|22|32-957-234-8742|7603.40|FURNITURE|g alongside of the special excuses-- fluffily enticing packages wake | +21|Customer#00021|XYmVpr9yAHDEn|8|18-902-614-8344|1428.25|MACHINERY| quickly final accounts integrate blithely furiously u| +22|Customer#00022|QI6p41,FNs5k7RZoCCVPUTkUdYpB|3|13-806-545-9701|591.98|MACHINERY|s nod furiously above the furiously ironic ideas. | +23|Customer#00023|OdY W13N7Be3OC5MpgfmcYss0Wn6TKT|3|13-312-472-8245|3332.02|HOUSEHOLD|deposits. special deposits cajole slyly. fluffily special deposits about the furiously | +24|Customer#00024|HXAFgIAyjxtdqwimt13Y3OZO 4xeLe7U8PqG|13|23-127-851-8031|9255.67|MACHINERY|into beans. fluffily final ideas haggle fluffily| +25|Customer#00025|Hp8GyFQgGHFYSilH5tBfe|12|22-603-468-3533|7133.70|FURNITURE|y. accounts sleep ruthlessly according to the regular theodolites. unusual instructions sleep. ironic, final| +26|Customer#00026|8ljrc5ZeMl7UciP|22|32-363-455-4837|5182.05|AUTOMOBILE|c requests use furiously ironic requests. slyly ironic dependencies us| +27|Customer#00027|IS8GIyxpBrLpMT0u7|3|13-137-193-2709|5679.84|BUILDING| about the carefully ironic pinto beans. accoun| +28|Customer#00028|iVyg0daQ,Tha8x2WPWA9m2529m|8|18-774-241-1462|1007.18|FURNITURE| along the regular deposits. furiously final pac| +29|Customer#00029|sJ5adtfyA
[31/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
http://git-wip-us.apache.org/repos/asf/carbondata/blob/bf73e9fe/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/matching/TestTPCDS_1_4_Batch.scala -- diff --git a/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/matching/TestTPCDS_1_4_Batch.scala b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/matching/TestTPCDS_1_4_Batch.scala new file mode 100644 index 000..074bf00 --- /dev/null +++ b/datamap/mv/core/src/test/scala/org/apache/carbondata/mv/rewrite/matching/TestTPCDS_1_4_Batch.scala @@ -0,0 +1,2496 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.mv.rewrite.matching + +object TestTPCDS_1_4_Batch { + val tpcds_1_4_testCases = Seq( + // sequence of triples. each triple denotes (MV, user query, rewritten query) + // test case 1: test SELECT-SELECT-EXACT_MATCH with simple SELECT (extract from q45) + ("case_1", + """ +|SELECT i_item_id, i_item_sk +|FROM item +|WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) +""".stripMargin.trim, + """ +|SELECT i_item_id +|FROM item +|WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19) +""".stripMargin.trim, + """ +|SELECT +|FROM +|WHERE +""".stripMargin.trim), + // test case 2: test SELECT-SELECT-EXACT_MATCH with SELECT containing join (derive from q64) + ("case_2", + """ +|SELECT cs1.product_name, cs1.store_name, cs1.store_zip, cs1.b_street_number, +| cs1.b_streen_name, cs1.b_city, cs1.b_zip, cs1.c_street_number, cs1.c_street_name, +| cs1.c_city, cs1.c_zip, cs1.syear, cs1.cnt, cs1.s1, cs1.s2, cs1.s3, cs2.s1, +| cs2.s2, cs2.s3, cs2.syear, cs2.cnt +|FROM cross_sales cs1,cross_sales cs2 +|WHERE cs1.item_sk=cs2.item_sk AND +| cs1.syear = 1999 AND +| cs2.syear = 1999 + 1 AND +| cs2.cnt <= cs1.cnt AND +| cs1.store_name = cs2.store_name AND +| cs1.store_zip = cs2.store_zip +""".stripMargin.trim, + """ +|SELECT cs1.product_name, cs1.store_name, cs1.store_zip, cs1.b_street_number, +| cs1.b_streen_name, cs1.b_city, cs1.b_zip, cs1.c_street_number, cs1.c_street_name, +| cs1.c_city, cs1.c_zip, cs1.syear, cs1.cnt, cs1.s1, cs1.s2, cs1.s3, cs2.s1, +| cs2.s2, cs2.s3 +|FROM cross_sales cs1,cross_sales cs2 +|WHERE cs1.item_sk=cs2.item_sk AND +| cs1.syear = 1999 AND +| cs2.syear = 1999 + 1 AND +| cs2.cnt <= cs1.cnt AND +| cs1.store_name = cs2.store_name AND +| cs1.store_zip = cs2.store_zip +|ORDER BY cs1.product_name, cs1.store_name, cs2.cnt +""".stripMargin.trim, + """ +|SELECT +|FROM +|WHERE +""".stripMargin.trim), + // test case 3: test simple SELECT with GROUPBY (from q99) + ("case_3", + """ +|SELECT count(ss_sold_date_sk) as not_null_total, +| max(ss_sold_date_sk) as max_ss_sold_date_sk, +| max(ss_sold_time_sk) as max_ss_sold_time_sk, +| ss_item_sk, +| ss_store_sk +|FROM store_sales +|GROUP BY ss_item_sk, ss_store_sk +""".stripMargin.trim, + """ +|SELECT count(ss_sold_date_sk) as not_null_total, +| max(ss_sold_date_sk) as max_ss_sold_date_sk, +| ss_item_sk, +| ss_store_sk +|FROM store_sales +|GROUP BY ss_item_sk, ss_store_sk +""".stripMargin.trim, + """ +|SELECT gen_subsumer_0.`not_null_total`, +| gen_subsumer_0.`max_ss_sold_date_sk`, +| gen_subsumer_0.`ss_item_sk`, +| gen_subsumer_0.`ss_store_sk` +|FROM +| (SELECT count(`ss_sold_date_sk`) AS `not_null_total`, max(`ss_sold_date_sk`) AS `max_ss_sold_date_sk`, max(`ss_sold_time_sk`) AS `max_ss_sold_time_sk`, `ss_item_sk`, `ss_store_sk` +| FROM store_sales +| GROUP BY `ss_item_sk`, `ss_
[35/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/integration/spark2/src/main/spark2.2/org/apache/spark/sql/hive/CarbonAnalyzer.scala -- diff --git a/integration/spark2/src/main/spark2.2/org/apache/spark/sql/hive/CarbonAnalyzer.scala b/integration/spark2/src/main/spark2.2/org/apache/spark/sql/hive/CarbonAnalyzer.scala index 88beb68..dfb89fd 100644 --- a/integration/spark2/src/main/spark2.2/org/apache/spark/sql/hive/CarbonAnalyzer.scala +++ b/integration/spark2/src/main/spark2.2/org/apache/spark/sql/hive/CarbonAnalyzer.scala @@ -20,15 +20,32 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.util.CarbonReflectionUtils class CarbonAnalyzer(catalog: SessionCatalog, conf: SQLConf, sparkSession: SparkSession, analyzer: Analyzer) extends Analyzer(catalog, conf) { + + val mvPlan = try { +CarbonReflectionUtils.createObject( + "org.apache.carbondata.mv.datamap.MVAnalyzerRule", + sparkSession)._1.asInstanceOf[Rule[LogicalPlan]] + } catch { +case e: Exception => + null + } + override def execute(plan: LogicalPlan): LogicalPlan = { var logicalPlan = analyzer.execute(plan) logicalPlan = CarbonPreAggregateDataLoadingRules(sparkSession).apply(logicalPlan) -CarbonPreAggregateQueryRules(sparkSession).apply(logicalPlan) +logicalPlan = CarbonPreAggregateQueryRules(sparkSession).apply(logicalPlan) +if (mvPlan != null) { + mvPlan.apply(logicalPlan) +} else { + logicalPlan +} } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/2881c6bb/pom.xml -- diff --git a/pom.xml b/pom.xml index 7273c76..e9551c0 100644 --- a/pom.xml +++ b/pom.xml @@ -640,6 +640,7 @@ mv datamap/mv/plan +datamap/mv/core
[34/50] [abbrv] carbondata git commit: [CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting
[CARBONDATA-2475] Support Modular Core for Materialized View DataMap for query matching and rewriting Support Modular Core for Materialized View DataMap This closes #2302 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bf73e9fe Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bf73e9fe Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bf73e9fe Branch: refs/heads/spark-2.3 Commit: bf73e9fe77523e23be46e7597e2c990e855401e5 Parents: d14c403 Author: ravipesala Authored: Sat May 12 22:49:19 2018 +0530 Committer: Jacky Li Committed: Sun May 13 17:08:19 2018 +0800 -- datamap/mv/core/pom.xml | 169 ++ .../carbondata/mv/datamap/MVAnalyzerRule.scala | 105 + .../mv/datamap/MVDataMapProvider.scala | 125 + .../apache/carbondata/mv/datamap/MVHelper.scala | 377 +++ .../apache/carbondata/mv/datamap/MVState.scala | 55 + .../mv/rewrite/DefaultMatchMaker.scala | 647 + .../carbondata/mv/rewrite/MatchConditions.scala | 28 + .../carbondata/mv/rewrite/MatchMaker.scala | 47 + .../carbondata/mv/rewrite/Navigator.scala | 196 ++ .../carbondata/mv/rewrite/QueryRewrite.scala| 53 + .../mv/rewrite/SummaryDatasetCatalog.scala | 168 ++ .../apache/carbondata/mv/rewrite/Utils.scala| 358 +++ .../mv/rewrite/MVCreateTestCase.scala | 676 + .../mv/rewrite/MVSampleTestCase.scala | 167 ++ .../carbondata/mv/rewrite/MVTPCDSTestCase.scala | 146 + .../carbondata/mv/rewrite/MVTpchTestCase.scala | 247 ++ .../SelectSelectExactChildrenSuite.scala| 76 + .../carbondata/mv/rewrite/Tpcds_1_4_Suite.scala | 80 + .../mv/rewrite/matching/TestSQLBatch.scala | 214 ++ .../rewrite/matching/TestTPCDS_1_4_Batch.scala | 2496 ++ 20 files changed, 6430 insertions(+) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/bf73e9fe/datamap/mv/core/pom.xml -- diff --git a/datamap/mv/core/pom.xml b/datamap/mv/core/pom.xml new file mode 100644 index 000..99a8e22 --- /dev/null +++ b/datamap/mv/core/pom.xml @@ -0,0 +1,169 @@ + + +http://maven.apache.org/POM/4.0.0"; xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd";> + + 4.0.0 + + +org.apache.carbondata +carbondata-parent +1.4.0-SNAPSHOT +../../../pom.xml + + + carbondata-mv-core + Apache CarbonData :: Materialized View Core + + +${basedir}/../../../dev + + + + + org.apache.carbondata + carbondata-mv-plan + ${project.version} + + + org.apache.carbondata + carbondata-spark2 + ${project.version} + + + org.scalatest + scalatest_${scala.binary.version} + test + + + + +src/test/scala + + +maven-compiler-plugin + + 1.8 + 1.8 + + + +org.apache.maven.plugins +maven-surefire-plugin +2.18 + + + false + ${project.build.directory}/surefire-reports + -Xmx3g -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m + +true + + false + false + + + + +org.apache.maven.plugins +maven-checkstyle-plugin +2.17 + + true + + + +org.scala-tools +maven-scala-plugin +2.15.2 + + +compile + + compile + +compile + + +testCompile + + testCompile + +test + + +process-resources + + compile + + + + + +org.apache.maven.plugins +maven-enforcer-plugin +1.4.1 + + true + + + +com.ning.maven.plugins +maven-duplicate-finder-plugin + + true + + + +org.scalatest +scalatest-maven-plugin +1.0 + + + ${project.build.directory}/surefire-reports + . + false + CarbonTestSuite.txt + -ea -Xmx3g -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m + + + + + +true + + + + +test + + test + + + + + + + + +sdvtest + + true + + + + http://git-wip-us.apache.org/
[14/50] [abbrv] carbondata git commit: [CARBONDATA-2464]Fixed OOM issue in case of Complex type
[CARBONDATA-2464]Fixed OOM issue in case of Complex type Problem: Query with Complex type is failing with OOM Root Cause: Complex type child column(No-dictionary) values are written in LV format, while reading the data it will read length then based on length it is reading the data. Converting byte array to int is giving wrong length value, because of this it's trying to create big memory chunk and as memory is not available in Unsafe it is failing with OOM. Code issue: While converting byte array to int it is not masking the the byte values and because of this is giving wrong integer value. Solution: Mask each byte and then left shift the bits This closes #2288 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ceb7c8dd Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ceb7c8dd Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ceb7c8dd Branch: refs/heads/spark-2.3 Commit: ceb7c8dd1ced457c7ce34f016abf30102e4931a9 Parents: cc0cbba Author: kumarvishal09 Authored: Wed May 9 17:04:21 2018 +0530 Committer: Jacky Li Committed: Thu May 10 15:27:32 2018 +0800 -- .../main/java/org/apache/carbondata/core/util/ByteUtil.java| 4 ++-- .../java/org/apache/carbondata/core/util/ByteUtilTest.java | 6 ++ 2 files changed, 8 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/ceb7c8dd/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java index d1c16bb..52fc3c3 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/ByteUtil.java @@ -517,8 +517,8 @@ public final class ByteUtil { } public static int toInt(byte[] bytes, int offset) { -return (((int)bytes[offset]) << 24) + (((int)bytes[offset + 1]) << 16) + -(((int)bytes[offset + 2]) << 8) + bytes[offset + 3]; +return (((int)bytes[offset] & 0xff) << 24) + (((int)bytes[offset + 1] & 0xff) << 16) + +(((int)bytes[offset + 2] & 0xff) << 8) + ((int)bytes[offset + 3] & 0xff); } public static void setInt(byte[] data, int offset, int value) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/ceb7c8dd/core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java -- diff --git a/core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java b/core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java index d51e184..d93aa49 100644 --- a/core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java +++ b/core/src/test/java/org/apache/carbondata/core/util/ByteUtilTest.java @@ -113,6 +113,12 @@ public class ByteUtilTest extends TestCase { prepareBuffers(); assertFalse(UnsafeComparer.INSTANCE.compareTo(buff1, buff2) < 0); } +@Test +public void testIntConversion() { +byte[] data = new byte[4]; +ByteUtil.setInt(data, 0, 968); +assertEquals(ByteUtil.toInt(data, 0), 968); +} @Test public void testEqualToCase() {
[16/50] [abbrv] carbondata git commit: [CARBONDATA-2435] Remove SDK dependency on spark jars.
[CARBONDATA-2435] Remove SDK dependency on spark jars. [CARBONDATA-2435] Remove SDK dependency on spark jars. Problem and cause : when sdk writer is used in standalone cluster without spark jars, exception is thrown during reverse dictionary cache initialize time. Solution: carbon SDK doesn't support dictionary encoding, This spark dependency is only for dictionary encoding. Move the spark dependency code inside dictionary encoding if block. So that SDK flow will not have to access spark class. This closes #2289 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ff5166ef Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ff5166ef Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ff5166ef Branch: refs/heads/spark-2.3 Commit: ff5166ef78c42ca0819d3d9fa439aa56d32953eb Parents: b2060c6 Author: ajantha-bhat Authored: Wed May 9 18:07:56 2018 +0530 Committer: ravipesala Committed: Thu May 10 16:23:12 2018 +0530 -- .../processing/datatypes/PrimitiveDataType.java | 12 ++ .../impl/DictionaryFieldConverterImpl.java | 12 ++ .../converter/impl/FieldEncoderFactory.java | 25 +++- .../converter/impl/RowConverterImpl.java| 13 ++ .../InputProcessorStepWithNoConverterImpl.java | 2 +- 5 files changed, 28 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/ff5166ef/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java index dee8968..e34c184 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java +++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java @@ -28,6 +28,8 @@ import java.util.Map; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.cache.Cache; +import org.apache.carbondata.core.cache.CacheProvider; +import org.apache.carbondata.core.cache.CacheType; import org.apache.carbondata.core.cache.dictionary.Dictionary; import org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier; import org.apache.carbondata.core.constants.CarbonCommonConstants; @@ -135,7 +137,6 @@ public class PrimitiveDataType implements GenericDataType { * @param parentname * @param columnId * @param carbonDimension - * @param cache * @param absoluteTableIdentifier * @param client * @param useOnePass @@ -144,9 +145,9 @@ public class PrimitiveDataType implements GenericDataType { * @param isEmptyBadRecords */ public PrimitiveDataType(CarbonColumn carbonColumn, String parentname, String columnId, - CarbonDimension carbonDimension, Cache cache, - AbsoluteTableIdentifier absoluteTableIdentifier, DictionaryClient client, Boolean useOnePass, - Map localCache, String nullFormat, Boolean isEmptyBadRecords) { + CarbonDimension carbonDimension, AbsoluteTableIdentifier absoluteTableIdentifier, + DictionaryClient client, Boolean useOnePass, Map localCache, + String nullFormat, Boolean isEmptyBadRecords) { this.name = carbonColumn.getColName(); this.parentname = parentname; this.columnId = columnId; @@ -163,6 +164,9 @@ public class PrimitiveDataType implements GenericDataType { dictionaryGenerator = new DirectDictionary(DirectDictionaryKeyGeneratorFactory .getDirectDictionaryGenerator(carbonDimension.getDataType())); } else if (carbonDimension.hasEncoding(Encoding.DICTIONARY)) { +CacheProvider cacheProvider = CacheProvider.getInstance(); +Cache cache = +cacheProvider.createCache(CacheType.REVERSE_DICTIONARY); Dictionary dictionary = null; if (useOnePass) { if (CarbonUtil.isFileExistsForGivenColumn(identifier)) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/ff5166ef/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/DictionaryFieldConverterImpl.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/DictionaryFieldConverterImpl.java b/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/DictionaryFieldConverterImpl.java index 1fb4086..0757f8a 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/DictionaryFieldCo
[21/50] [abbrv] carbondata git commit: [CARBONDATA-2452] [CARBONDATA-2451] [CARBONDATA-2450] [CARBONDATA-2453] Fixed issues related to complex types
[CARBONDATA-2452] [CARBONDATA-2451] [CARBONDATA-2450] [CARBONDATA-2453] Fixed issues related to complex types Issue 1: Dictionary encoding was being added to complex types in SDK case which led to data load failure Issue 2: Sort columns were not being checked against table schema to validate the same. Issue 3: Bad record handling was not there for complex types. Issue 4: Parent name was not being prepended to field name before checking for duplicates which threw duplicate column exception This closes #2278 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6b70b7e4 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6b70b7e4 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6b70b7e4 Branch: refs/heads/spark-2.3 Commit: 6b70b7e47b05a612ccb5a5ad01ee2d5a05ffa600 Parents: 8e7fceb Author: kunal642 Authored: Mon May 7 20:58:21 2018 +0530 Committer: kumarvishal09 Committed: Fri May 11 03:27:36 2018 +0530 -- .../schema/table/TableSchemaBuilder.java| 21 +- .../complexType/TestComplexTypeQuery.scala | 2 + .../TestNonTransactionalCarbonTable.scala | 410 +-- .../processing/datatypes/ArrayDataType.java | 11 +- .../processing/datatypes/GenericDataType.java | 3 +- .../processing/datatypes/PrimitiveDataType.java | 41 +- .../processing/datatypes/StructDataType.java| 11 +- .../loading/DataLoadProcessBuilder.java | 9 + .../impl/ComplexFieldConverterImpl.java | 2 +- .../DirectDictionaryFieldConverterImpl.java | 1 - .../loading/model/CarbonLoadModelBuilder.java | 15 +- .../InputProcessorStepWithNoConverterImpl.java | 32 +- .../sdk/file/CarbonWriterBuilder.java | 24 +- 13 files changed, 524 insertions(+), 58 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6b70b7e4/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java index b078400..03d03f8 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java @@ -122,7 +122,13 @@ public class TableSchemaBuilder { private ColumnSchema addColumn(StructField field, String parentName, AtomicInteger valIndex, boolean isSortColumn, boolean isComplexChild) { Objects.requireNonNull(field); -checkRepeatColumnName(field); +if (isComplexChild) { + // if field is complex then append parent name to the child field to check + // if any other field with same name exists + checkRepeatColumnName(field, parentName); +} else { + checkRepeatColumnName(field); +} ColumnSchema newColumn = new ColumnSchema(); if (parentName != null) { newColumn.setColumnName(parentName + "." + field.getFieldName()); @@ -156,7 +162,7 @@ public class TableSchemaBuilder { // SO, this will not have any impact. newColumn.setColumnUniqueId(field.getFieldName()); newColumn.setColumnReferenceId(newColumn.getColumnUniqueId()); -newColumn.setEncodingList(createEncoding(field.getDataType(), isSortColumn)); +newColumn.setEncodingList(createEncoding(field.getDataType(), isSortColumn, isComplexChild)); if (field.getDataType().isComplexType()) { if (field.getDataType().getName().equalsIgnoreCase("ARRAY")) { newColumn.setNumberOfChild(1); @@ -209,6 +215,12 @@ public class TableSchemaBuilder { /** * Throw exception if {@param field} name is repeated */ + private void checkRepeatColumnName(StructField field, String parentName) { +checkRepeatColumnName( +new StructField(parentName + "." + field.getFieldName(), field.getDataType(), +field.getChildren())); + } + private void checkRepeatColumnName(StructField field) { for (ColumnSchema column : sortColumns) { if (column.getColumnName().equalsIgnoreCase(field.getFieldName())) { @@ -234,9 +246,10 @@ public class TableSchemaBuilder { } } - private List createEncoding(DataType dataType, boolean isSortColumn) { + private List createEncoding(DataType dataType, boolean isSortColumn, + boolean isComplexChild) { List encodings = new LinkedList<>(); -if (dataType == DataTypes.TIMESTAMP || dataType == DataTypes.DATE) { +if (dataType == DataTypes.DATE && !isComplexChild) { encodings.add(Encoding.DIRECT_DICTIONARY); encodings.add(Encoding.DICTIONARY); } http://git-wip-us.apache.
[19/50] [abbrv] carbondata git commit: [CARBONDATA-2442][CARBONDATA-2469] Fixed: multiple issues in sdk writer and external table
[CARBONDATA-2442][CARBONDATA-2469] Fixed: multiple issues in sdk writer and external table problem1: when two sdk writer output with differnt schema is placed in same folder for reading, output is not as expected. It has many null output. root cause: when multiple carbondata and indexx files is placed in same folder. table schema is inferred by first file. comparing table schema with all other index file schema validation is not present solution: compare table schema with all other index file schema, if there is a mismatch throw exception problem2: External Table must show its location instead of default store location solution: For external tables, show the carbon table path instead of default store location in describe formatted This closes #2273 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1dfbcfcc Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1dfbcfcc Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1dfbcfcc Branch: refs/heads/spark-2.3 Commit: 1dfbcfccca5302fd02c31d2c2386cafa75b1f10c Parents: fe436c3 Author: ajantha-bhat Authored: Sat May 5 16:59:44 2018 +0530 Committer: kunal642 Committed: Thu May 10 23:02:43 2018 +0530 -- .../schema/table/column/ColumnSchema.java | 30 ++ .../LatestFilesReadCommittedScope.java | 5 + .../apache/carbondata/core/util/CarbonUtil.java | 64 +--- .../hadoop/api/CarbonTableInputFormat.java | 43 .../createTable/TestCreateExternalTable.scala | 2 + .../TestNonTransactionalCarbonTable.scala | 103 ++- .../table/CarbonDescribeFormattedCommand.scala | 4 +- 7 files changed, 233 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1dfbcfcc/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java index edede18..1f05f63 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java @@ -343,6 +343,36 @@ public class ColumnSchema implements Serializable, Writable { } /** + * method to compare columnSchema, + * other parameters along with just column name and column data type + * @param obj + * @return + */ + public boolean equalsWithStrictCheck(Object obj) { +if (!this.equals(obj)) { + return false; +} +ColumnSchema other = (ColumnSchema) obj; +if (!columnUniqueId.equals(other.columnUniqueId) || +(isDimensionColumn != other.isDimensionColumn) || +(scale != other.scale) || +(precision != other.precision) || +(isSortColumn != other.isSortColumn)) { + return false; +} +if (encodingList.size() != other.encodingList.size()) { + return false; +} +for (int i = 0; i < encodingList.size(); i++) { + if (encodingList.get(i).compareTo(other.encodingList.get(i)) != 0) { +return false; + } +} + +return true; + } + + /** * @return the dataType */ public DataType getDataType() { http://git-wip-us.apache.org/repos/asf/carbondata/blob/1dfbcfcc/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java index 3f870b8..8abf537 100644 --- a/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java +++ b/core/src/main/java/org/apache/carbondata/core/readcommitter/LatestFilesReadCommittedScope.java @@ -122,6 +122,11 @@ public class LatestFilesReadCommittedScope implements ReadCommittedScope { @Override public void takeCarbonIndexFileSnapShot() throws IOException { // Read the current file Path get the list of indexes from the path. CarbonFile file = FileFactory.getCarbonFile(carbonFilePath); +if (file == null) { + // For nonTransactional table, files can be removed at any point of time. + // So cannot assume files will be present + throw new IOException("No files are present in the table location :"+ carbonFilePath); +} Map> indexFileStore = new HashMap<>(); if (file.isDirectory()) { CarbonFile[] carbonIndexFiles = SegmentIndexFileStore.getCarbonIndexF
[28/50] [abbrv] carbondata git commit: [CARBONDATA-2474] Support Modular Plan for Materialized View DataMap
http://git-wip-us.apache.org/repos/asf/carbondata/blob/ffddba70/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/BirdcageOptimizer.scala -- diff --git a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/BirdcageOptimizer.scala b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/BirdcageOptimizer.scala new file mode 100644 index 000..6363089 --- /dev/null +++ b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/BirdcageOptimizer.scala @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.mv.plans.util + +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.optimizer._ +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, _} +import org.apache.spark.sql.catalyst.rules.{RuleExecutor, _} +import org.apache.spark.sql.internal.SQLConf + +object BirdcageOptimizer extends RuleExecutor[LogicalPlan] { + + val conf = new SQLConf() +.copy(SQLConf.CASE_SENSITIVE -> true, SQLConf.STARSCHEMA_DETECTION -> true) + protected val fixedPoint = FixedPoint(conf.optimizerMaxIterations) + + def batches: Seq[Batch] = { +// Technically some of the rules in Finish Analysis are not optimizer rules and belong more +// in the analyzer, because they are needed for correctness (e.g. ComputeCurrentTime). +// However, because we also use the analyzer to canonicalized queries (for view definition), +// we do not eliminate subqueries or compute current time in the analyzer. +Batch( + "Finish Analysis", Once, + EliminateSubqueryAliases, + EliminateView, + ReplaceExpressions, + ComputeCurrentTime, + // GetCurrentDatabase(sessionCatalog), + RewriteDistinctAggregates, + ReplaceDeduplicateWithAggregate) :: + // +// Optimizer rules start here + // +// - Do the first call of CombineUnions before starting the major Optimizer rules, +// since it can reduce the number of iteration and the other rules could add/move +// extra operators between two adjacent Union operators. +// - Call CombineUnions again in Batch("Operator Optimizations"), +// since the other rules might make two separate Unions operators adjacent. +Batch( + "Union", Once, + CombineUnions) :: +Batch( + "Pullup Correlated Expressions", Once, + PullupCorrelatedPredicates) :: +Batch( + "Subquery", Once, + OptimizeSubqueries) :: +Batch( + "Replace Operators", fixedPoint, + ReplaceIntersectWithSemiJoin, + ReplaceExceptWithAntiJoin, + ReplaceDistinctWithAggregate) :: +Batch( + "Aggregate", fixedPoint, + RemoveLiteralFromGroupExpressions, + RemoveRepetitionFromGroupExpressions) :: +Batch( + "Operator Optimizations", fixedPoint, Seq( +// Operator push down +PushProjectionThroughUnion, +ReorderJoin(conf), +EliminateOuterJoin(conf), +PushPredicateThroughJoin, +PushDownPredicate, +// LimitPushDown(conf), +ColumnPruning, +// InferFiltersFromConstraints(conf), +// Operator combine +CollapseRepartition, +CollapseProject, +CollapseWindow, +CombineFilters, +CombineLimits, +CombineUnions, +// Constant folding and strength reduction +NullPropagation(conf), +FoldablePropagation, +// OptimizeIn(conf), +ConstantFolding, +ReorderAssociativeOperator, +LikeSimplification, +BooleanSimplification, +SimplifyConditionals, +RemoveDispensableExpressions, +SimplifyBinaryComparison, +// PruneFilters(conf), +EliminateSorts, +SimplifyCasts, +SimplifyCaseConversionExpressions, +RewriteCorrelatedScalarSubquery, +EliminateSerializa