[2/2] incubator-carbondata git commit: [CARBONDATA-100]Implement BigInt value compression This closes #338
[CARBONDATA-100]Implement BigInt value compression This closes #338 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/c95e565e Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/c95e565e Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/c95e565e Branch: refs/heads/master Commit: c95e565e471dc1352a169368cc7d06e68abc5599 Parents: 7213ac0 20af74b Author: jackylk Authored: Thu Dec 1 19:42:52 2016 +0800 Committer: jackylk Committed: Thu Dec 1 19:42:52 2016 +0800 -- .../core/compression/BigIntCompressor.java | 110 + .../core/compression/DoubleCompressor.java | 242 +++ .../core/compression/ValueCompressor.java | 103 .../compression/type/UnCompressDefaultLong.java | 11 +- .../compression/type/UnCompressMaxMinByte.java | 39 ++- .../type/UnCompressMaxMinByteForLong.java | 8 +- .../type/UnCompressMaxMinDefault.java | 13 +- .../type/UnCompressMaxMinDefaultLong.java | 9 +- .../compression/type/UnCompressMaxMinFloat.java | 10 +- .../compression/type/UnCompressMaxMinInt.java | 36 ++- .../compression/type/UnCompressMaxMinLong.java | 37 ++- .../compression/type/UnCompressMaxMinShort.java | 36 ++- .../compression/type/UnCompressNoneByte.java| 39 ++- .../compression/type/UnCompressNoneDefault.java | 10 +- .../compression/type/UnCompressNoneFloat.java | 11 +- .../compression/type/UnCompressNoneInt.java | 29 ++- .../compression/type/UnCompressNoneLong.java| 39 ++- .../compression/type/UnCompressNoneShort.java | 38 ++- ...ractHeavyCompressedDoubleArrayDataStore.java | 17 +- .../core/util/ValueCompressionUtil.java | 80 -- .../core/util/ValueCompressionUtilTest.java | 8 +- .../store/CarbonFactDataHandlerColumnar.java| 2 - 22 files changed, 844 insertions(+), 83 deletions(-) --
[1/2] incubator-carbondata git commit: Implement BigInt value compression
Repository: incubator-carbondata Updated Branches: refs/heads/master 7213ac057 -> c95e565e4 Implement BigInt value compression fix for review comment Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/20af74ba Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/20af74ba Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/20af74ba Branch: refs/heads/master Commit: 20af74ba719141002c68bc5e82bc05131e47a2d9 Parents: 7213ac0 Author: Ashok Kumar Authored: Sun Jul 24 00:43:58 2016 +0530 Committer: jackylk Committed: Thu Dec 1 19:38:55 2016 +0800 -- .../core/compression/BigIntCompressor.java | 110 + .../core/compression/DoubleCompressor.java | 242 +++ .../core/compression/ValueCompressor.java | 103 .../compression/type/UnCompressDefaultLong.java | 11 +- .../compression/type/UnCompressMaxMinByte.java | 39 ++- .../type/UnCompressMaxMinByteForLong.java | 8 +- .../type/UnCompressMaxMinDefault.java | 13 +- .../type/UnCompressMaxMinDefaultLong.java | 9 +- .../compression/type/UnCompressMaxMinFloat.java | 10 +- .../compression/type/UnCompressMaxMinInt.java | 36 ++- .../compression/type/UnCompressMaxMinLong.java | 37 ++- .../compression/type/UnCompressMaxMinShort.java | 36 ++- .../compression/type/UnCompressNoneByte.java| 39 ++- .../compression/type/UnCompressNoneDefault.java | 10 +- .../compression/type/UnCompressNoneFloat.java | 11 +- .../compression/type/UnCompressNoneInt.java | 29 ++- .../compression/type/UnCompressNoneLong.java| 39 ++- .../compression/type/UnCompressNoneShort.java | 38 ++- ...ractHeavyCompressedDoubleArrayDataStore.java | 17 +- .../core/util/ValueCompressionUtil.java | 80 -- .../core/util/ValueCompressionUtilTest.java | 8 +- .../store/CarbonFactDataHandlerColumnar.java| 2 - 22 files changed, 844 insertions(+), 83 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/20af74ba/core/src/main/java/org/apache/carbondata/core/compression/BigIntCompressor.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/compression/BigIntCompressor.java b/core/src/main/java/org/apache/carbondata/core/compression/BigIntCompressor.java new file mode 100644 index 000..7b9e52f --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/compression/BigIntCompressor.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.carbondata.core.compression; + +import org.apache.carbondata.core.datastorage.store.dataholder.CarbonWriteDataHolder; +import org.apache.carbondata.core.util.ValueCompressionUtil.DataType; + +/** + * It compresses big int data + */ +public class BigIntCompressor extends ValueCompressor { + + @Override protected Object compressNonDecimalMaxMin(DataType changedDataType, + CarbonWriteDataHolder dataHolder, int decimal, Object max) { +// in case if bigint, decimal will be 0 +return compressMaxMin(changedDataType, dataHolder, max); + } + + @Override + protected Object compressNonDecimal(DataType changedDataType, CarbonWriteDataHolder dataHolder, + int decimal) { +// in case if bigint, decimal will be 0 +return compressNone(changedDataType, dataHolder); + } + + @Override + protected Object compressMaxMin(DataType changedDataType, CarbonWriteDataHolder dataHolder, + Object max) { +long maxValue = (long) max; +long[] value = dataHolder.getWritableLongValues(); +int i = 0; +switch (changedDataType) { + case DATA_BYTE: +byte[] result = new byte[value.length]; +for (int j = 0; j < value.length; j++) { + result[i] = (byte) (maxValue - value[j]); + i++; +} +return result; + case DATA_SHORT: +short[] shortResul
[2/2] incubator-carbondata git commit: [CARBONDATA-478][SPARK2]Spark2 module should have different SparkRowReadSupportImpl with spark1 This closes #377
[CARBONDATA-478][SPARK2]Spark2 module should have different SparkRowReadSupportImpl with spark1 This closes #377 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/997af85d Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/997af85d Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/997af85d Branch: refs/heads/master Commit: 997af85dc1b2caaccaf523afc75d483ba4d6a84a Parents: c95e565 9961f53 Author: jackylk Authored: Thu Dec 1 20:27:43 2016 +0800 Committer: jackylk Committed: Thu Dec 1 20:27:43 2016 +0800 -- .../readsupport/SparkRowReadSupportImpl.java| 76 .../readsupport/SparkRowReadSupportImpl.java| 70 ++ .../readsupport/SparkRowReadSupportImpl.java| 57 +++ 3 files changed, 127 insertions(+), 76 deletions(-) --
[1/2] incubator-carbondata git commit: fixLatedecoderIssueForSpark2
Repository: incubator-carbondata Updated Branches: refs/heads/master c95e565e4 -> 997af85dc fixLatedecoderIssueForSpark2 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/9961f537 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/9961f537 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/9961f537 Branch: refs/heads/master Commit: 9961f537f2229985b09d22204c827638b4e4a397 Parents: c95e565 Author: QiangCai Authored: Thu Dec 1 19:32:04 2016 +0800 Committer: jackylk Committed: Thu Dec 1 20:26:40 2016 +0800 -- .../readsupport/SparkRowReadSupportImpl.java| 76 .../readsupport/SparkRowReadSupportImpl.java| 70 ++ .../readsupport/SparkRowReadSupportImpl.java| 57 +++ 3 files changed, 127 insertions(+), 76 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/9961f537/integration/spark-common/src/main/java/org/apache/carbondata/spark/readsupport/SparkRowReadSupportImpl.java -- diff --git a/integration/spark-common/src/main/java/org/apache/carbondata/spark/readsupport/SparkRowReadSupportImpl.java b/integration/spark-common/src/main/java/org/apache/carbondata/spark/readsupport/SparkRowReadSupportImpl.java deleted file mode 100644 index 4b1958d..000 --- a/integration/spark-common/src/main/java/org/apache/carbondata/spark/readsupport/SparkRowReadSupportImpl.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.carbondata.spark.readsupport; - -import java.sql.Timestamp; - -import org.apache.carbondata.core.carbon.AbsoluteTableIdentifier; -import org.apache.carbondata.core.carbon.metadata.datatype.DataType; -import org.apache.carbondata.core.carbon.metadata.encoder.Encoding; -import org.apache.carbondata.core.carbon.metadata.schema.table.column.CarbonColumn; -import org.apache.carbondata.core.util.DataTypeUtil; -import org.apache.carbondata.hadoop.readsupport.impl.AbstractDictionaryDecodedReadSupport; - -import org.apache.spark.sql.Row; -import org.apache.spark.sql.catalyst.expressions.GenericRow; -import org.apache.spark.unsafe.types.UTF8String; - -public class SparkRowReadSupportImpl extends AbstractDictionaryDecodedReadSupport { - - @Override public void initialize(CarbonColumn[] carbonColumns, - AbsoluteTableIdentifier absoluteTableIdentifier) { -super.initialize(carbonColumns, absoluteTableIdentifier); -//can initialize and generate schema here. - } - - @Override public Row readRow(Object[] data) { -for (int i = 0; i < dictionaries.length; i++) { - if (dictionaries[i] != null) { -data[i] = DataTypeUtil - .getDataBasedOnDataType(dictionaries[i].getDictionaryValueForKey((int) data[i]), -dataTypes[i]); -switch (dataTypes[i]) { - case STRING: -data[i] = UTF8String.fromString(data[i].toString()); -break; - case TIMESTAMP: -data[i] = new Timestamp((long) data[i] / 1000); -break; - case LONG: -data[i] = data[i]; -break; - default: -} - } - else if (carbonColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY)) { -//convert the long to timestamp in case of direct dictionary column -if (DataType.TIMESTAMP == carbonColumns[i].getDataType()) { - data[i] = new Timestamp((long) data[i] / 1000); -} - } -// else if(dataTypes[i].equals(DataType.INT)) { -//data[i] = ((Long)(data[i])).intValue(); -// } -//else if(dataTypes[i].equals(DataType.SHORT)) { -//data[i] = ((Double)(data[i])).shortValue(); -// } -} -return new GenericRow(data); - } -} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/9961f537/integration/spark/src/main/java/org/apache/ca
[2/2] incubator-carbondata git commit: [CARBONDATA-368]Insert into carbon table feature This closes #366
[CARBONDATA-368]Insert into carbon table feature This closes #366 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/90bc3669 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/90bc3669 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/90bc3669 Branch: refs/heads/master Commit: 90bc3669974f136c117b9c3424dc2ebc8bd91738 Parents: 997af85 5f1abef Author: jackylk Authored: Thu Dec 1 23:29:28 2016 +0800 Committer: jackylk Committed: Thu Dec 1 23:29:28 2016 +0800 -- .../ThriftWrapperSchemaConverterImpl.java | 2 + .../metadata/schema/table/CarbonTable.java | 80 - .../schema/table/column/CarbonColumn.java | 14 +- .../schema/table/column/CarbonDimension.java| 7 +- .../schema/table/column/CarbonMeasure.java | 6 +- .../schema/table/column/ColumnSchema.java | 8 + .../core/constants/CarbonCommonConstants.java | 9 + .../carbondata/scan/filter/FilterUtilTest.java | 8 +- format/src/main/thrift/schema.thrift| 7 +- .../execution/command/carbonTableSchema.scala | 15 +- .../spark/sql/CarbonCatalystOperators.scala | 21 +++ .../org/apache/spark/sql/CarbonContext.scala| 2 +- .../spark/sql/CarbonDatasourceRelation.scala| 43 - .../scala/org/apache/spark/sql/CarbonScan.scala | 43 ++--- .../org/apache/spark/sql/CarbonSqlParser.scala | 31 ++-- .../execution/command/carbonTableSchema.scala | 22 +++ .../sql/hive/CarbonPreInsertionCasts.scala | 59 ++ .../spark/sql/hive/CarbonStrategies.scala | 4 + .../spark/sql/optimizer/CarbonOptimizer.scala | 1 + .../spark/src/test/resources/shortolap.csv | 5 + .../TestLoadDataWithSingleQuotechar.scala | 8 +- .../InsertIntoCarbonTableTestCase.scala | 178 +++ .../TestNoInvertedIndexLoadAndQuery.scala | 2 +- 23 files changed, 500 insertions(+), 75 deletions(-) --
[1/2] incubator-carbondata git commit: Insert into carbon table new
Repository: incubator-carbondata Updated Branches: refs/heads/master 997af85dc -> 90bc36699 Insert into carbon table new Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/5f1abef7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/5f1abef7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/5f1abef7 Branch: refs/heads/master Commit: 5f1abef794d22110f2b40fc0fabad19c7f215f0b Parents: 997af85 Author: ashok.blend Authored: Mon Nov 28 21:20:45 2016 -0800 Committer: jackylk Committed: Thu Dec 1 23:28:40 2016 +0800 -- .../ThriftWrapperSchemaConverterImpl.java | 2 + .../metadata/schema/table/CarbonTable.java | 80 - .../schema/table/column/CarbonColumn.java | 14 +- .../schema/table/column/CarbonDimension.java| 7 +- .../schema/table/column/CarbonMeasure.java | 6 +- .../schema/table/column/ColumnSchema.java | 8 + .../core/constants/CarbonCommonConstants.java | 9 + .../carbondata/scan/filter/FilterUtilTest.java | 8 +- format/src/main/thrift/schema.thrift| 7 +- .../execution/command/carbonTableSchema.scala | 15 +- .../spark/sql/CarbonCatalystOperators.scala | 21 +++ .../org/apache/spark/sql/CarbonContext.scala| 2 +- .../spark/sql/CarbonDatasourceRelation.scala| 43 - .../scala/org/apache/spark/sql/CarbonScan.scala | 43 ++--- .../org/apache/spark/sql/CarbonSqlParser.scala | 31 ++-- .../execution/command/carbonTableSchema.scala | 22 +++ .../sql/hive/CarbonPreInsertionCasts.scala | 59 ++ .../spark/sql/hive/CarbonStrategies.scala | 4 + .../spark/sql/optimizer/CarbonOptimizer.scala | 1 + .../spark/src/test/resources/shortolap.csv | 5 + .../TestLoadDataWithSingleQuotechar.scala | 8 +- .../InsertIntoCarbonTableTestCase.scala | 178 +++ .../TestNoInvertedIndexLoadAndQuery.scala | 2 +- 23 files changed, 500 insertions(+), 75 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5f1abef7/core/src/main/java/org/apache/carbondata/core/carbon/metadata/converter/ThriftWrapperSchemaConverterImpl.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/carbon/metadata/converter/ThriftWrapperSchemaConverterImpl.java b/core/src/main/java/org/apache/carbondata/core/carbon/metadata/converter/ThriftWrapperSchemaConverterImpl.java index 8a53895..c1020e3 100644 --- a/core/src/main/java/org/apache/carbondata/core/carbon/metadata/converter/ThriftWrapperSchemaConverterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/carbon/metadata/converter/ThriftWrapperSchemaConverterImpl.java @@ -167,6 +167,7 @@ public class ThriftWrapperSchemaConverterImpl implements SchemaConverter { thriftColumnSchema.setColumnProperties(wrapperColumnSchema.getColumnProperties()); thriftColumnSchema.setInvisible(wrapperColumnSchema.isInvisible()); thriftColumnSchema.setColumnReferenceId(wrapperColumnSchema.getColumnReferenceId()); + thriftColumnSchema.setSchemaOrdinal(wrapperColumnSchema.getSchemaOrdinal()); return thriftColumnSchema; } @@ -339,6 +340,7 @@ public class ThriftWrapperSchemaConverterImpl implements SchemaConverter { wrapperColumnSchema.setColumnProperties(externalColumnSchema.getColumnProperties()); wrapperColumnSchema.setInvisible(externalColumnSchema.isInvisible()); wrapperColumnSchema.setColumnReferenceId(externalColumnSchema.getColumnReferenceId()); + wrapperColumnSchema.setSchemaOrdinal(externalColumnSchema.getSchemaOrdinal()); return wrapperColumnSchema; } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5f1abef7/core/src/main/java/org/apache/carbondata/core/carbon/metadata/schema/table/CarbonTable.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/carbon/metadata/schema/table/CarbonTable.java b/core/src/main/java/org/apache/carbondata/core/carbon/metadata/schema/table/CarbonTable.java index c0a7032..d3e2e62 100644 --- a/core/src/main/java/org/apache/carbondata/core/carbon/metadata/schema/table/CarbonTable.java +++ b/core/src/main/java/org/apache/carbondata/core/carbon/metadata/schema/table/CarbonTable.java @@ -21,7 +21,10 @@ package org.apache.carbondata.core.carbon.metadata.schema.table; import java.io.Serializable; import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -30,6 +33,7 @@ import org.apache.carbondata.common.logging.LogServiceFact
[1/2] incubator-carbondata git commit: fix compatibility
Repository: incubator-carbondata Updated Branches: refs/heads/master 5406cee1b -> 72900c553 fix compatibility Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/7904716b Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/7904716b Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/7904716b Branch: refs/heads/master Commit: 7904716b9396b9ba660e4cb08ef0cba1821f3166 Parents: 5406cee Author: jackylk Authored: Fri Dec 2 12:10:58 2016 +0800 Committer: jackylk Committed: Fri Dec 2 12:10:58 2016 +0800 -- .../apache/carbondata/core/carbon/ColumnarFormatVersion.java| 5 + 1 file changed, 5 insertions(+) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7904716b/core/src/main/java/org/apache/carbondata/core/carbon/ColumnarFormatVersion.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/carbon/ColumnarFormatVersion.java b/core/src/main/java/org/apache/carbondata/core/carbon/ColumnarFormatVersion.java index bef345c..8a1b8eb 100644 --- a/core/src/main/java/org/apache/carbondata/core/carbon/ColumnarFormatVersion.java +++ b/core/src/main/java/org/apache/carbondata/core/carbon/ColumnarFormatVersion.java @@ -39,9 +39,14 @@ public enum ColumnarFormatVersion { public static ColumnarFormatVersion valueOf(short version) { switch (version) { + case 0: +// before multiple reader support, for existing carbon file, it is version 1 +return V1; case 1: +// after multiple reader support, user can write new file with version 1 return V1; case 2: +// after multiple reader support, user can write new file with version 2 return V2; default: throw new IllegalArgumentException("invalid format version: " + version);
[2/2] incubator-carbondata git commit: Fix compatibility This closes #380
Fix compatibility This closes #380 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/72900c55 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/72900c55 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/72900c55 Branch: refs/heads/master Commit: 72900c553e923fb6403d21990d1dbf13c2c957de Parents: 5406cee 7904716 Author: jackylk Authored: Fri Dec 2 12:13:49 2016 +0800 Committer: jackylk Committed: Fri Dec 2 12:13:49 2016 +0800 -- .../apache/carbondata/core/carbon/ColumnarFormatVersion.java| 5 + 1 file changed, 5 insertions(+) --
[2/2] incubator-carbondata git commit: [CARBONDATA-481][SPARK2]fix late decoder and support whole stage code gen This closes #379
[CARBONDATA-481][SPARK2]fix late decoder and support whole stage code gen This closes #379 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/f47bbc2c Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/f47bbc2c Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/f47bbc2c Branch: refs/heads/master Commit: f47bbc2c23c330c6c30f768efb0f18ea610d5e30 Parents: 72900c5 0776187 Author: jackylk Authored: Fri Dec 2 12:27:26 2016 +0800 Committer: jackylk Committed: Fri Dec 2 12:27:26 2016 +0800 -- conf/dataload.properties.template | 4 +- examples/spark2/src/main/resources/data.csv | 20 +- .../carbondata/examples/CarbonExample.scala | 36 +- .../sql/CarbonDatasourceHadoopRelation.scala| 6 +- .../spark/sql/CarbonDictionaryDecoder.scala | 151 +++- .../execution/CarbonLateDecodeStrategy.scala| 345 ++- .../sql/optimizer/CarbonLateDecodeRule.scala| 101 +- 7 files changed, 533 insertions(+), 130 deletions(-) --
[1/2] incubator-carbondata git commit: fix bug in late decode optimizer and strategy
Repository: incubator-carbondata Updated Branches: refs/heads/master 72900c553 -> f47bbc2c2 fix bug in late decode optimizer and strategy Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/07761876 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/07761876 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/07761876 Branch: refs/heads/master Commit: 07761876e45bb76d9932fd2009108c722b718280 Parents: 72900c5 Author: QiangCai Authored: Fri Dec 2 07:50:08 2016 +0800 Committer: jackylk Committed: Fri Dec 2 12:27:02 2016 +0800 -- conf/dataload.properties.template | 4 +- examples/spark2/src/main/resources/data.csv | 20 +- .../carbondata/examples/CarbonExample.scala | 36 +- .../sql/CarbonDatasourceHadoopRelation.scala| 6 +- .../spark/sql/CarbonDictionaryDecoder.scala | 151 +++- .../execution/CarbonLateDecodeStrategy.scala| 345 ++- .../sql/optimizer/CarbonLateDecodeRule.scala| 101 +- 7 files changed, 533 insertions(+), 130 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/07761876/conf/dataload.properties.template -- diff --git a/conf/dataload.properties.template b/conf/dataload.properties.template index d5e9d6a..cfafb4c 100644 --- a/conf/dataload.properties.template +++ b/conf/dataload.properties.template @@ -18,14 +18,14 @@ #carbon store path # you should change to the code path of your local machine -carbon.storelocation=/Users/jackylk/code/incubator-carbondata/examples/spark2/target/store +carbon.storelocation=/home/david/Documents/incubator-carbondata/examples/spark2/target/store #true: use kettle to load data #false: use new flow to load data use_kettle=true # you should change to the code path of your local machine -carbon.kettle.home=/Users/jackylk/code/incubator-carbondata/processing/carbonplugins +carbon.kettle.home=/home/david/Documents/incubator-carbondata/processing/carbonplugins #csv delimiter character delimiter=, http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/07761876/examples/spark2/src/main/resources/data.csv -- diff --git a/examples/spark2/src/main/resources/data.csv b/examples/spark2/src/main/resources/data.csv index 5d3169e..83ea3b3 100644 --- a/examples/spark2/src/main/resources/data.csv +++ b/examples/spark2/src/main/resources/data.csv @@ -1,11 +1,11 @@ shortField,intField,bigintField,doubleField,stringField,timestampField -1, 10, 100, 48.4, spark, 2015/4/23 -5, 17, 140, 43.4, spark, 2015/7/27 -1, 11, 100, 44.4, flink, 2015/5/23 -1, 10, 150, 43.4, spark, 2015/7/24 -1, 10, 100, 47.4, spark, 2015/7/23 -3, 14, 160, 43.4, hive, 2015/7/26 -2, 10, 100, 43.4, impala, 2015/7/23 -1, 10, 100, 43.4, spark, 2015/5/23 -4, 16, 130, 42.4, impala, 2015/7/23 -1, 10, 100, 43.4, spark, 2015/7/23 +1,10,100,48.4,spark,2015/4/23 +5,17,140,43.4,spark,2015/7/27 +1,11,100,44.4,flink,2015/5/23 +1,10,150,43.4,spark,2015/7/24 +1,10,100,47.4,spark,2015/7/23 +3,14,160,43.4,hive,2015/7/26 +2,10,100,43.4,impala,2015/7/23 +1,10,100,43.4,spark,2015/5/23 +4,16,130,42.4,impala,2015/7/23 +1,10,100,43.4,spark,2015/7/23 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/07761876/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala -- diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala index 75fdd1c..d3a7e86 100644 --- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala +++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala @@ -26,7 +26,7 @@ object CarbonExample { def main(args: Array[String]): Unit = { // to run the example, plz change this path to your local machine path -val rootPath = "/Users/jackylk/code/incubator-carbondata" +val rootPath = "/home/david/Documents/incubator-carbondata" val spark = SparkSession .builder() .master("local") @@ -38,10 +38,10 @@ object CarbonExample { spark.sparkContext.setLogLevel("WARN") // Drop table -spark.sql("DROP TABLE IF EXISTS carbon_table") -spark.sql("DROP TABLE IF EXISTS csv_table") - -// Create table +//spark.sql("DROP TABLE IF EXISTS carbon_table") +//spark.sql("DROP TABLE IF EXISTS csv_table") +// +//// Create table spark.sql( s"""
[2/2] incubator-carbondata git commit: [CARBONDATA-471]Optimized no kettle flow and fixed issues in cluster This closes #333
[CARBONDATA-471]Optimized no kettle flow and fixed issues in cluster This closes #333 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/e7e370ca Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/e7e370ca Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/e7e370ca Branch: refs/heads/master Commit: e7e370cac28d3db35c8e5ec6ab4d3fb62af749f4 Parents: f47bbc2 63434fa Author: jackylk Authored: Fri Dec 2 16:08:14 2016 +0800 Committer: jackylk Committed: Fri Dec 2 16:08:14 2016 +0800 -- .../carbondata/common/CarbonIterator.java | 14 +++ .../AbstractDetailQueryResultIterator.java | 2 +- .../carbondata/hadoop/csv/CSVInputFormat.java | 42 +--- .../recorditerator/RecordReaderIterator.java| 31 +- .../spark/rdd/NewCarbonDataLoadRDD.scala| 40 --- .../processing/iterator/CarbonIterator.java | 38 --- .../processing/newflow/DataLoadExecutor.java| 5 +- .../newflow/DataLoadProcessBuilder.java | 8 +- .../sort/impl/ParallelReadMergeSorterImpl.java | 90 .../newflow/steps/InputProcessorStepImpl.java | 105 ++- .../sortandgroupby/sortdata/SortDataRows.java | 40 +++ 11 files changed, 268 insertions(+), 147 deletions(-) --
[1/2] incubator-carbondata git commit: Optimize data loading
Repository: incubator-carbondata Updated Branches: refs/heads/master f47bbc2c2 -> e7e370cac Optimize data loading Handled broadcast fails. Updated as per comments Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/63434fac Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/63434fac Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/63434fac Branch: refs/heads/master Commit: 63434fac5f4dc2d7eb9d03819401e243744a5f48 Parents: f47bbc2 Author: ravipesala Authored: Sun Nov 27 17:39:36 2016 +0530 Committer: jackylk Committed: Fri Dec 2 15:52:02 2016 +0800 -- .../carbondata/common/CarbonIterator.java | 14 +++ .../AbstractDetailQueryResultIterator.java | 2 +- .../carbondata/hadoop/csv/CSVInputFormat.java | 42 +--- .../recorditerator/RecordReaderIterator.java| 31 +- .../spark/rdd/NewCarbonDataLoadRDD.scala| 40 --- .../processing/iterator/CarbonIterator.java | 38 --- .../processing/newflow/DataLoadExecutor.java| 5 +- .../newflow/DataLoadProcessBuilder.java | 8 +- .../sort/impl/ParallelReadMergeSorterImpl.java | 90 .../newflow/steps/InputProcessorStepImpl.java | 105 ++- .../sortandgroupby/sortdata/SortDataRows.java | 40 +++ 11 files changed, 268 insertions(+), 147 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/63434fac/common/src/main/java/org/apache/carbondata/common/CarbonIterator.java -- diff --git a/common/src/main/java/org/apache/carbondata/common/CarbonIterator.java b/common/src/main/java/org/apache/carbondata/common/CarbonIterator.java index 9141bcd..b1a5b5a 100644 --- a/common/src/main/java/org/apache/carbondata/common/CarbonIterator.java +++ b/common/src/main/java/org/apache/carbondata/common/CarbonIterator.java @@ -35,4 +35,18 @@ public abstract class CarbonIterator implements Iterator { throw new UnsupportedOperationException("remove"); } + /** + * Initialize the iterator + */ + public void initialize() { +// sub classes can overwrite to provide initialize logic to this method + } + + /** + * Close the resources + */ + public void close() { +// sub classes can overwrite to provide close logic to this method + } + } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/63434fac/core/src/main/java/org/apache/carbondata/scan/result/iterator/AbstractDetailQueryResultIterator.java -- diff --git a/core/src/main/java/org/apache/carbondata/scan/result/iterator/AbstractDetailQueryResultIterator.java b/core/src/main/java/org/apache/carbondata/scan/result/iterator/AbstractDetailQueryResultIterator.java index c8c61b0..07ccab4 100644 --- a/core/src/main/java/org/apache/carbondata/scan/result/iterator/AbstractDetailQueryResultIterator.java +++ b/core/src/main/java/org/apache/carbondata/scan/result/iterator/AbstractDetailQueryResultIterator.java @@ -114,7 +114,7 @@ public abstract class AbstractDetailQueryResultIterator extends CarbonIterator { DataRefNodeFinder finder = new BTreeDataRefNodeFinder(blockInfo.getEachColumnValueSize()); DataRefNode startDataBlock = finder .findFirstDataBlock(blockInfo.getDataBlock().getDataRefNode(), blockInfo.getStartKey()); - while (startDataBlock.nodeNumber() != blockInfo.getStartBlockletIndex()) { + while (startDataBlock.nodeNumber() < blockInfo.getStartBlockletIndex()) { startDataBlock = startDataBlock.getNextDataRefNode(); } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/63434fac/hadoop/src/main/java/org/apache/carbondata/hadoop/csv/CSVInputFormat.java -- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/csv/CSVInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/csv/CSVInputFormat.java index 3ea96ac..ca27673 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/csv/CSVInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/csv/CSVInputFormat.java @@ -66,6 +66,8 @@ public class CSVInputFormat extends FileInputFormat createRecordReader(InputSplit inputSplit, @@ -85,10 +87,10 @@ public class CSVInputFormat extends FileInputFormat { @@ -163,8 +176,9 @@ public class CSVInputFormat extends FileInputFormathttp://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/63434fac/hadoop/src/main/java/org/apache/carbondata/hadoop/csv/recorditerator/RecordReaderIterator.java -- diff --g
[1/2] incubator-carbondata git commit: fix bug for reading dataframe concurrently
Repository: incubator-carbondata Updated Branches: refs/heads/master e7e370cac -> 9dd09659a fix bug for reading dataframe concurrently Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/9dcdf7de Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/9dcdf7de Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/9dcdf7de Branch: refs/heads/master Commit: 9dcdf7de6bde64d1c800fd268f2099d2278e8f33 Parents: e7e370c Author: QiangCai Authored: Fri Dec 2 17:41:23 2016 +0800 Committer: QiangCai Committed: Fri Dec 2 17:46:56 2016 +0800 -- .../carbondata/spark/rdd/CarbonDataLoadRDD.scala | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/9dcdf7de/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala index 319d85c..5d6a663 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonDataLoadRDD.scala @@ -18,6 +18,7 @@ package org.apache.carbondata.spark.rdd import java.lang.Long +import java.nio.ByteBuffer import java.text.SimpleDateFormat import java.util import java.util.UUID @@ -548,10 +549,22 @@ class DataFrameLoaderRDD[K, V]( class PartitionIterator(partitionIter: Iterator[DataLoadPartitionWrap[Row]], carbonLoadModel: CarbonLoadModel, context: TaskContext) extends JavaRddIterator[JavaRddIterator[Array[String]]] { + val serializer = SparkEnv.get.closureSerializer.newInstance() + var serializeBuffer: ByteBuffer = null def hasNext: Boolean = partitionIter.hasNext + def next: JavaRddIterator[Array[String]] = { val value = partitionIter.next -new RddIterator(value.rdd.iterator(value.partition, context), +// The rdd (which come from Hive Table) don't support to read dataframe concurrently. +// So here will create different rdd instance for each thread. +val newInstance = { + if (serializeBuffer == null) { +serializeBuffer = serializer.serialize[RDD[Row]](value.rdd) + } + serializeBuffer.rewind() + serializer.deserialize[RDD[Row]](serializeBuffer) +} +new RddIterator(newInstance.iterator(value.partition, context), carbonLoadModel, context) }
[2/2] incubator-carbondata git commit: [CARBONDATA-486]fix bug for reading dataframe concurrently This closes #382
[CARBONDATA-486]fix bug for reading dataframe concurrently This closes #382 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/9dd09659 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/9dd09659 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/9dd09659 Branch: refs/heads/master Commit: 9dd09659a3bb5ee7e636098dcde54699aac67844 Parents: e7e370c 9dcdf7d Author: jackylk Authored: Fri Dec 2 21:43:52 2016 +0800 Committer: jackylk Committed: Fri Dec 2 21:43:52 2016 +0800 -- .../carbondata/spark/rdd/CarbonDataLoadRDD.scala | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) --
[2/2] incubator-carbondata git commit: [CARBONDATA-459] Block distribution is wrong in case of dynamic allocation=true This closes #362
[CARBONDATA-459] Block distribution is wrong in case of dynamic allocation=true This closes #362 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/cffcb998 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/cffcb998 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/cffcb998 Branch: refs/heads/master Commit: cffcb998a273af310f134e41f3ade892b19b03bc Parents: 9dd0965 ff7793b Author: jackylk Authored: Sat Dec 3 02:44:50 2016 +0800 Committer: jackylk Committed: Sat Dec 3 02:44:50 2016 +0800 -- .../core/constants/CarbonCommonConstants.java | 23 +++ .../carbondata/core/util/CarbonProperties.java | 23 +++ .../carbondata/spark/rdd/CarbonMergerRDD.scala | 7 +- .../spark/sql/hive/DistributionUtil.scala | 155 --- .../org/apache/spark/sql/CarbonContext.scala| 2 - 5 files changed, 181 insertions(+), 29 deletions(-) --
[1/2] incubator-carbondata git commit: Problem: Block distribution is wrong in case of dynamic allocation=true
Repository: incubator-carbondata Updated Branches: refs/heads/master 9dd09659a -> cffcb998a Problem: Block distribution is wrong in case of dynamic allocation=true Analysis: In case when dynamic allocation is true and configured max executors are more than the initial executors then carbon is not able to request the max number of executors configured. Due to this resources are getting under utilized and case when number of blocks increases, the distribution of blocks is limited to the number of nodes and the number of tasks launched are less. This leads to under utilization of resources and hence impacts the query and load performance. Fix: Request for starting the maximum number of configured executors in case dynamic allocation is true. Impact area: Query and data load flow performance due to under utilization of resources. Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/ff7793be Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/ff7793be Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/ff7793be Branch: refs/heads/master Commit: ff7793beb079847a57a5d3d5b33a37c1976e53fb Parents: 9dd0965 Author: manishgupta88 Authored: Mon Nov 28 15:37:11 2016 +0530 Committer: jackylk Committed: Sat Dec 3 02:37:28 2016 +0800 -- .../core/constants/CarbonCommonConstants.java | 23 +++ .../carbondata/core/util/CarbonProperties.java | 23 +++ .../carbondata/spark/rdd/CarbonMergerRDD.scala | 7 +- .../spark/sql/hive/DistributionUtil.scala | 155 --- .../org/apache/spark/sql/CarbonContext.scala| 2 - 5 files changed, 181 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ff7793be/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 1ac2ba1..29dbbd8 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -919,6 +919,29 @@ public final class CarbonCommonConstants { * maximum length of column */ public static final int DEFAULT_COLUMN_LENGTH = 10; + /** + * Maximum waiting time (in seconds) for a query for requested executors to be started + */ + public static final String CARBON_EXECUTOR_STARTUP_TIMEOUT = + "carbon.max.executor.startup.timeout"; + + /** + * default value for executor start up waiting time out + */ + public static final String CARBON_EXECUTOR_WAITING_TIMEOUT_DEFAULT = "5"; + + /** + * Max value. If value configured by user is more than this than this value will value will be + * considered + */ + public static final int CARBON_EXECUTOR_WAITING_TIMEOUT_MAX = 60; + + /** + * time for which thread will sleep and check again if the requested number of executors + * have been started + */ + public static final int CARBON_EXECUTOR_STARTUP_THREAD_SLEEP_TIME = 250; + private CarbonCommonConstants() { } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ff7793be/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java index f4ec63d..3657215 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java @@ -85,6 +85,7 @@ public final class CarbonProperties { validateHighCardinalityThreshold(); validateHighCardinalityInRowCountPercentage(); validateCarbonDataFileVersion(); +validateExecutorStartUpTime(); } private void validateBadRecordsLocation() { @@ -539,4 +540,26 @@ public final class CarbonProperties { return defaultVal; } + /** + * This method will validate and set the value for executor start up waiting time out + */ + private void validateExecutorStartUpTime() { +int executorStartUpTimeOut = 0; +try { + executorStartUpTimeOut = Integer.parseInt(carbonProperties + .getProperty(CarbonCommonConstants.CARBON_EXECUTOR_STARTUP_TIMEOUT, + CarbonCommonConstants.CARBON_EXECUTOR_WAITING_TIMEOUT_DEFAULT)); + // If value configured by user is more than max value of time out then consider
[1/2] incubator-carbondata git commit: fix spark2 compilation
Repository: incubator-carbondata Updated Branches: refs/heads/master cffcb998a -> d5f409840 fix spark2 compilation Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/223cf9aa Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/223cf9aa Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/223cf9aa Branch: refs/heads/master Commit: 223cf9aa7f226705cf947b972f128d0c16604fc8 Parents: cffcb99 Author: jackylk Authored: Fri Dec 2 22:09:33 2016 +0800 Committer: jackylk Committed: Sat Dec 3 08:37:53 2016 +0800 -- .../spark/rdd/NewCarbonDataLoadRDD.scala| 39 1 file changed, 23 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/223cf9aa/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala b/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala index 914cdab..05ba3ac 100644 --- a/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala +++ b/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala @@ -32,11 +32,12 @@ import org.apache.spark.{Partition, SparkContext, TaskContext} import org.apache.spark.rdd.RDD import org.apache.spark.sql.execution.command.Partitioner +import org.apache.carbondata.common.CarbonIterator import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.common.logging.impl.StandardLogService import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.load.{BlockDetails, LoadMetadataDetails} -import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory +import org.apache.carbondata.core.util.{CarbonProperties, CarbonTimeStatisticsFactory} import org.apache.carbondata.hadoop.csv.CSVInputFormat import org.apache.carbondata.hadoop.csv.recorditerator.RecordReaderIterator import org.apache.carbondata.processing.model.CarbonLoadModel @@ -168,9 +169,13 @@ class NewCarbonDataLoadRDD[K, V]( throw e } - def getInputIterators: Array[util.Iterator[Array[AnyRef]]] = { + def getInputIterators: Array[CarbonIterator[Array[AnyRef]]] = { val attemptId = new TaskAttemptID(jobTrackerId, id, TaskType.MAP, theSplit.index, 0) -val configuration: Configuration = confBroadcast.value.value +var configuration: Configuration = confBroadcast.value.value +// Broadcast fails in some cases +if (configuration == null) { + configuration = new Configuration() +} configureCSVInputFormat(configuration) val hadoopAttemptContext = new TaskAttemptContextImpl(configuration, attemptId) val format = new CSVInputFormat @@ -195,10 +200,11 @@ class NewCarbonDataLoadRDD[K, V]( partitionID, split.partitionBlocksDetail.length) val readers = split.partitionBlocksDetail.map(format.createRecordReader(_, hadoopAttemptContext)) - readers.zipWithIndex.foreach { case (reader, index) => -reader.initialize(split.partitionBlocksDetail(index), hadoopAttemptContext) + readers.zipWithIndex.map { case (reader, index) => +new RecordReaderIterator(reader, + split.partitionBlocksDetail(index), + hadoopAttemptContext) } - readers.map(new RecordReaderIterator(_)) } else { // for node partition val split = theSplit.asInstanceOf[CarbonNodePartition] @@ -220,21 +226,22 @@ class NewCarbonDataLoadRDD[K, V]( StandardLogService.setThreadName(blocksID, null) val readers = split.nodeBlocksDetail.map(format.createRecordReader(_, hadoopAttemptContext)) - readers.zipWithIndex.foreach { case (reader, index) => -reader.initialize(split.nodeBlocksDetail(index), hadoopAttemptContext) + readers.zipWithIndex.map { case (reader, index) => +new RecordReaderIterator(reader, split.nodeBlocksDetail(index), hadoopAttemptContext) } - readers.map(new RecordReaderIterator(_)) } } def configureCSVInputFormat(configuration: Configuration): Unit = { -CSVInputFormat.setCommentCharacter(carbonLoadModel.getCommentChar, configuration) -CSVInputFormat.setCSVDelimiter(carbonLoadModel.getCsvDelimiter, configuration) -CSVInputFormat.setEscapeCharacter(carbonLoadModel.getEscapeChar,
[2/2] incubator-carbondata git commit: [CARBONDARA-487] fix spark2 compilation This closes #383
[CARBONDARA-487] fix spark2 compilation This closes #383 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/d5f40984 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/d5f40984 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/d5f40984 Branch: refs/heads/master Commit: d5f4098404f3122c1d39cc9fecfa61281931ab40 Parents: cffcb99 223cf9a Author: jackylk Authored: Sat Dec 3 08:40:50 2016 +0800 Committer: jackylk Committed: Sat Dec 3 08:40:50 2016 +0800 -- .../spark/rdd/NewCarbonDataLoadRDD.scala| 39 1 file changed, 23 insertions(+), 16 deletions(-) --
[1/2] incubator-carbondata git commit: insertinto for spark2
Repository: incubator-carbondata Updated Branches: refs/heads/master d5f409840 -> 5e0a07221 insertinto for spark2 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/c1882f29 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/c1882f29 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/c1882f29 Branch: refs/heads/master Commit: c1882f29239f3d4fdf77a8ea51f8a5abe3fc955b Parents: d5f4098 Author: QiangCai Authored: Sat Dec 3 01:53:32 2016 +0800 Committer: jackylk Committed: Sat Dec 3 09:43:03 2016 +0800 -- examples/spark2/src/main/resources/data.csv | 3 +- .../carbondata/examples/CarbonExample.scala | 114 +++ .../sql/CarbonDatasourceHadoopRelation.scala| 18 ++- .../execution/command/carbonTableSchema.scala | 26 - .../apache/spark/sql/hive/CarbonMetastore.scala | 2 +- 5 files changed, 109 insertions(+), 54 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c1882f29/examples/spark2/src/main/resources/data.csv -- diff --git a/examples/spark2/src/main/resources/data.csv b/examples/spark2/src/main/resources/data.csv index 83ea3b3..fcdf3c1 100644 --- a/examples/spark2/src/main/resources/data.csv +++ b/examples/spark2/src/main/resources/data.csv @@ -1,4 +1,3 @@ -shortField,intField,bigintField,doubleField,stringField,timestampField 1,10,100,48.4,spark,2015/4/23 5,17,140,43.4,spark,2015/7/27 1,11,100,44.4,flink,2015/5/23 @@ -8,4 +7,4 @@ shortField,intField,bigintField,doubleField,stringField,timestampField 2,10,100,43.4,impala,2015/7/23 1,10,100,43.4,spark,2015/5/23 4,16,130,42.4,impala,2015/7/23 -1,10,100,43.4,spark,2015/7/23 +1,10,100,43.4,spark,2015/7/23 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c1882f29/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala -- diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala index d3a7e86..59cc4e9 100644 --- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala +++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala @@ -17,31 +17,49 @@ package org.apache.spark.sql.examples +import java.io.File + +import org.apache.commons.io.FileUtils import org.apache.spark.sql.SparkSession -import org.apache.spark.util.TableLoader import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties object CarbonExample { def main(args: Array[String]): Unit = { -// to run the example, plz change this path to your local machine path -val rootPath = "/home/david/Documents/incubator-carbondata" +val rootPath = new File(this.getClass.getResource("/").getPath ++ "../../../..").getCanonicalPath +val storeLocation = s"$rootPath/examples/spark2/target/store" +val warehouse = s"$rootPath/examples/spark2/target/warehouse" +val metastoredb = s"$rootPath/examples/spark2/target/metastore_db" + +// clean data folder +if (true) { + val clean = (path: String) => FileUtils.deleteDirectory(new File(path)) + clean(storeLocation) + clean(warehouse) + clean(metastoredb) +} + val spark = SparkSession .builder() .master("local") .appName("CarbonExample") .enableHiveSupport() -.config(CarbonCommonConstants.STORE_LOCATION, - s"$rootPath/examples/spark2/target/store") +.config("carbon.kettle.home", + s"$rootPath/processing/carbonplugins") +.config("carbon.storelocation", storeLocation) +.config("spark.sql.warehouse.dir", warehouse) +.config("javax.jdo.option.ConnectionURL", + s"jdbc:derby:;databaseName=$metastoredb;create=true") .getOrCreate() spark.sparkContext.setLogLevel("WARN") -// Drop table -//spark.sql("DROP TABLE IF EXISTS carbon_table") -//spark.sql("DROP TABLE IF EXISTS csv_table") -// -//// Create table +CarbonProperties.getInstance() +.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "/MM/dd") + +// Create table spark.sql( s""" | CREATE TABLE carbon_table( @@ -49,47 +67,47 @@ object CarbonExam
[2/2] incubator-carbondata git commit: [CARBONDATA-488][SPARK2]add InsertInto feature for spark2 This closes #384
[CARBONDATA-488][SPARK2]add InsertInto feature for spark2 This closes #384 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/5e0a0722 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/5e0a0722 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/5e0a0722 Branch: refs/heads/master Commit: 5e0a07221e663af9c38de2c642dfa93cd1039ea2 Parents: d5f4098 c1882f2 Author: jackylk Authored: Sat Dec 3 09:51:14 2016 +0800 Committer: jackylk Committed: Sat Dec 3 09:51:14 2016 +0800 -- examples/spark2/src/main/resources/data.csv | 3 +- .../carbondata/examples/CarbonExample.scala | 114 +++ .../sql/CarbonDatasourceHadoopRelation.scala| 18 ++- .../execution/command/carbonTableSchema.scala | 26 - .../apache/spark/sql/hive/CarbonMetastore.scala | 2 +- 5 files changed, 109 insertions(+), 54 deletions(-) --
[2/2] incubator-carbondata git commit: [CARBONDATA-489] Fix spark2 decimal issue This closes #386
[CARBONDATA-489] Fix spark2 decimal issue This closes #386 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/151962af Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/151962af Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/151962af Branch: refs/heads/master Commit: 151962afbe3a9738498f4225896c5cb165cef8f7 Parents: 5e0a072 7f54160 Author: jackylk Authored: Sun Dec 4 02:12:05 2016 +0800 Committer: jackylk Committed: Sun Dec 4 02:12:05 2016 +0800 -- .../datastorage/store/impl/FileFactory.java | 32 .../org/apache/spark/sql/CarbonSource.scala | 18 +-- .../org/apache/spark/sql/TableCreator.scala | 5 ++- .../apache/spark/sql/hive/CarbonMetastore.scala | 14 +++-- .../carbondata/CarbonDataSourceSuite.scala | 15 + pom.xml | 6 ++-- 6 files changed, 65 insertions(+), 25 deletions(-) --
[1/2] incubator-carbondata git commit: fix spark2 decimal
Repository: incubator-carbondata Updated Branches: refs/heads/master 5e0a07221 -> 151962afb fix spark2 decimal code clean comment fix comment fix Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/7f54160f Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/7f54160f Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/7f54160f Branch: refs/heads/master Commit: 7f54160f6ff6a584519121bff2536d3ed38c5026 Parents: 5e0a072 Author: wangfei Authored: Sat Dec 3 12:44:07 2016 +0800 Committer: jackylk Committed: Sun Dec 4 02:10:41 2016 +0800 -- .../datastorage/store/impl/FileFactory.java | 32 .../org/apache/spark/sql/CarbonSource.scala | 18 +-- .../org/apache/spark/sql/TableCreator.scala | 5 ++- .../apache/spark/sql/hive/CarbonMetastore.scala | 14 +++-- .../carbondata/CarbonDataSourceSuite.scala | 15 + pom.xml | 6 ++-- 6 files changed, 65 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7f54160f/core/src/main/java/org/apache/carbondata/core/datastorage/store/impl/FileFactory.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastorage/store/impl/FileFactory.java b/core/src/main/java/org/apache/carbondata/core/datastorage/store/impl/FileFactory.java index a94d3f1..c540920 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastorage/store/impl/FileFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/datastorage/store/impl/FileFactory.java @@ -397,6 +397,38 @@ public final class FileFactory { } } + public static boolean deleteFile(String filePath, FileType fileType) throws IOException { +filePath = filePath.replace("\\", "/"); +switch (fileType) { + case HDFS: + case ALLUXIO: + case VIEWFS: +Path path = new Path(filePath); +FileSystem fs = path.getFileSystem(configuration); +return fs.delete(path, true); + + case LOCAL: + default: +File file = new File(filePath); +return deleteAllFilesOfDir(file); +} + } + + public static boolean deleteAllFilesOfDir(File path) { +if (!path.exists()) { + return true; +} +if (path.isFile()) { + return path.delete(); +} +File[] files = path.listFiles(); +for (int i = 0; i < files.length; i++) { + deleteAllFilesOfDir(files[i]); +} +return path.delete(); + } + + public static boolean mkdirs(String filePath, FileType fileType) throws IOException { filePath = filePath.replace("\\", "/"); switch (fileType) { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7f54160f/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala index fb87ba2..b14a95c 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.execution.CarbonLateDecodeStrategy import org.apache.spark.sql.execution.command.{CreateTable, Field} import org.apache.spark.sql.optimizer.CarbonLateDecodeRule import org.apache.spark.sql.sources._ -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{DecimalType, StructType} import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.spark.CarbonOption @@ -114,20 +114,18 @@ class CarbonSource extends CreatableRelationProvider } catch { case ex: NoSuchTableException => val fields = dataSchema.map { col => - val column = col.name val dataType = Option(col.dataType.toString) - val name = Option(col.name) // This is to parse complex data types - val x = col.name + ' ' + col.dataType - val f: Field = Field(column, dataType, name, None, null) + val f: Field = Field(col.name, dataType, Option(col.name), None, null) // the data type of the decimal type will be like decimal(10,0) // so checking the start of the string and taking the precision and scale. // resetting the data type with decimal - if (f.dataType.getOrElse("").startsWith("decimal")) { -val (precision, scale) = TableCreato
[1/2] incubator-carbondata git commit: fix profile issue for idea
Repository: incubator-carbondata Updated Branches: refs/heads/master 151962afb -> bf9478640 fix profile issue for idea Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/eda593fe Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/eda593fe Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/eda593fe Branch: refs/heads/master Commit: eda593fe8ae80e41a4874cd042aedd57c2a4f9b0 Parents: 151962a Author: QiangCai Authored: Sun Dec 4 02:16:18 2016 +0800 Committer: jackylk Committed: Sun Dec 4 10:38:15 2016 +0800 -- examples/spark2/pom.xml | 26 +++ hadoop/pom.xml | 28 - integration/spark-common/pom.xml | 14 --- integration/spark2/pom.xml | 47 --- pom.xml | 4 +-- processing/pom.xml | 18 +++--- 6 files changed, 51 insertions(+), 86 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/eda593fe/examples/spark2/pom.xml -- diff --git a/examples/spark2/pom.xml b/examples/spark2/pom.xml index bfb68d7..0b5d03c 100644 --- a/examples/spark2/pom.xml +++ b/examples/spark2/pom.xml @@ -38,6 +38,32 @@ org.apache.carbondata carbondata-spark2 ${project.version} + + + org.apache.spark + spark-hive-thriftserver_2.10 + + + org.apache.spark + spark-repl_2.10 + + + org.apache.spark + spark-sql_2.10 + + + + + org.apache.spark + spark-sql_${scala.binary.version} + + + org.apache.spark + spark-hive-thriftserver_${scala.binary.version} + + + org.apache.spark + spark-repl_${scala.binary.version} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/eda593fe/hadoop/pom.xml -- diff --git a/hadoop/pom.xml b/hadoop/pom.xml index b04c4c6..97ee3e1 100644 --- a/hadoop/pom.xml +++ b/hadoop/pom.xml @@ -36,38 +36,10 @@ org.apache.carbondata - carbondata-common - ${project.version} - - - org.apache.carbondata - carbondata-core - ${project.version} - - - org.apache.carbondata - carbondata-format - ${project.version} - - - org.apache.carbondata carbondata-processing ${project.version} - org.apache.hadoop - hadoop-common - - - org.apache.hadoop - hadoop-hdfs - - - org.apache.commons - commons-lang3 - 3.3.2 - - junit junit http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/eda593fe/integration/spark-common/pom.xml -- diff --git a/integration/spark-common/pom.xml b/integration/spark-common/pom.xml index d3f42b4..22ba7d5 100644 --- a/integration/spark-common/pom.xml +++ b/integration/spark-common/pom.xml @@ -36,16 +36,6 @@ org.apache.carbondata - carbondata-common - ${project.version} - - - org.apache.carbondata - carbondata-core - ${project.version} - - - org.apache.carbondata carbondata-processing ${project.version} @@ -71,10 +61,6 @@ spark-hive-thriftserver_${scala.binary.version} - org.apache.spark - spark-sql_${scala.binary.version} - - junit junit http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/eda593fe/integration/spark2/pom.xml -- diff --git a/integration/spark2/pom.xml b/integration/spark2/pom.xml index ee56067..c3a35df 100644 --- a/integration/spark2/pom.xml +++ b/integration/spark2/pom.xml @@ -35,46 +35,19 @@ - com.databricks - spark-csv_${scala.binary.version} - ${spark.csv.version} - - - org.apache.carbondata - carbondata-common - ${project.version} - - - org.apache.carbondata - carbondata-core - ${project.version} - - - org.apache.carbondata - carbondata-processing - ${project.version} - - - org.apache.carbondata - carbondata-hadoop - ${project.version} - - org.apache.carbondata carbondata-spark-common ${project.version} - - - org.scala-lang - scala-compiler - - - org.scala-lang - scala-reflect - - - org.scala-lang - scala-libr
[2/2] incubator-carbondata git commit: [CARBONDATA-492]fix a bug of profile spark-2.0 for intellij idea This closes #390
[CARBONDATA-492]fix a bug of profile spark-2.0 for intellij idea This closes #390 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/bf947864 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/bf947864 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/bf947864 Branch: refs/heads/master Commit: bf9478640dd231a540f52c2557c1c6d564810d1e Parents: 151962a eda593f Author: jackylk Authored: Sun Dec 4 10:38:55 2016 +0800 Committer: jackylk Committed: Sun Dec 4 10:38:55 2016 +0800 -- examples/spark2/pom.xml | 26 +++ hadoop/pom.xml | 28 - integration/spark-common/pom.xml | 14 --- integration/spark2/pom.xml | 47 --- pom.xml | 4 +-- processing/pom.xml | 18 +++--- 6 files changed, 51 insertions(+), 86 deletions(-) --
[1/2] incubator-carbondata git commit: do not reply on runnable command
Repository: incubator-carbondata Updated Branches: refs/heads/master bf9478640 -> e7958b61e do not reply on runnable command code clean change spark version to 2.0.2 getclass.getname for logger fix compile issue with spark2.0.2 loginfo storepath in carbonenv revert pom change fix testsuite fix comment remove no use imports Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/3386a26b Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/3386a26b Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/3386a26b Branch: refs/heads/master Commit: 3386a26bd049ccf225c45f451f76fe064548408a Parents: bf94786 Author: wangfei Authored: Sat Dec 3 20:40:39 2016 +0800 Committer: jackylk Committed: Sun Dec 4 23:19:32 2016 +0800 -- .../spark/sql/CarbonCatalystOperators.scala | 57 --- .../scala/org/apache/spark/sql/CarbonEnv.scala | 8 +- .../execution/command/carbonTableSchema.scala | 157 +++ .../apache/spark/sql/hive/CarbonMetastore.scala | 18 ++- pom.xml | 8 +- 5 files changed, 43 insertions(+), 205 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3386a26b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala index c152e0c..88e43fd 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala @@ -17,54 +17,12 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.{TableIdentifier} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{UnaryNode, _} import org.apache.spark.sql.optimizer.{CarbonDecoderRelation} -import org.apache.spark.sql.types._ import org.apache.carbondata.spark.CarbonAliasDecoderRelation -/** - * Top command - */ -case class Top(count: Int, topOrBottom: Int, dim: NamedExpression, msr: NamedExpression, -child: LogicalPlan) extends UnaryNode { - def output: Seq[Attribute] = child.output - - override def references: AttributeSet = { -val list = List(dim, msr) -AttributeSet(list.flatMap(_.references)) - } -} - -/** - * Shows Loads in a table - */ -case class ShowLoadsCommand(databaseNameOp: Option[String], table: String, limit: Option[String]) - extends LogicalPlan with Command { - - override def children: Seq[LogicalPlan] = Seq.empty - - override def output: Seq[Attribute] = { -Seq(AttributeReference("SegmentSequenceId", StringType, nullable = false)(), - AttributeReference("Status", StringType, nullable = false)(), - AttributeReference("Load Start Time", TimestampType, nullable = false)(), - AttributeReference("Load End Time", TimestampType, nullable = false)()) - } -} - -/** - * Describe formatted for hive table - */ -case class DescribeFormattedCommand(sql: String, tblIdentifier: TableIdentifier) - extends LogicalPlan with Command { - override def children: Seq[LogicalPlan] = Seq.empty - - override def output: Seq[AttributeReference] = -Seq(AttributeReference("result", StringType, nullable = false)()) -} - case class CarbonDictionaryCatalystDecoder( relations: Seq[CarbonDecoderRelation], profile: CarbonProfile, @@ -81,18 +39,3 @@ abstract class CarbonProfile(attributes: Seq[Attribute]) extends Serializable { case class IncludeProfile(attributes: Seq[Attribute]) extends CarbonProfile(attributes) case class ExcludeProfile(attributes: Seq[Attribute]) extends CarbonProfile(attributes) - -case class CreateDatabase(dbName: String, sql: String) extends LogicalPlan with Command { - override def children: Seq[LogicalPlan] = Seq.empty - override def output: Seq[AttributeReference] = { -Seq() - } -} - -case class DropDatabase(dbName: String, isCascade: Boolean, sql: String) -extends LogicalPlan with Command { - override def children: Seq[LogicalPlan] = Seq.empty - override def output: Seq[AttributeReference] = { -Seq() - } -} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/3386a26b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonEnv.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonEnv.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonEnv.scala index
[2/2] incubator-carbondata git commit: [CARBONDATA-491] spark2 integration: Do not use runnable command in spark2 This closes #389
[CARBONDATA-491] spark2 integration: Do not use runnable command in spark2 This closes #389 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/e7958b61 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/e7958b61 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/e7958b61 Branch: refs/heads/master Commit: e7958b61e80bd88bf0966f33508a1ff65519c070 Parents: bf94786 3386a26 Author: jackylk Authored: Sun Dec 4 23:20:04 2016 +0800 Committer: jackylk Committed: Sun Dec 4 23:20:04 2016 +0800 -- .../spark/sql/CarbonCatalystOperators.scala | 57 --- .../scala/org/apache/spark/sql/CarbonEnv.scala | 8 +- .../execution/command/carbonTableSchema.scala | 157 +++ .../apache/spark/sql/hive/CarbonMetastore.scala | 18 ++- pom.xml | 8 +- 5 files changed, 43 insertions(+), 205 deletions(-) --
[2/2] incubator-carbondata git commit: [CARBONDATA-493]fix bug for insert into select from empty table This closes #396
[CARBONDATA-493]fix bug for insert into select from empty table This closes #396 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/8a673304 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/8a673304 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/8a673304 Branch: refs/heads/master Commit: 8a67330419129266eb8713a1ef0b6fd7fe9bcba0 Parents: bfeb573 a62b99b Author: jackylk Authored: Mon Dec 5 16:30:43 2016 +0800 Committer: jackylk Committed: Mon Dec 5 16:30:43 2016 +0800 -- .../spark/sql/execution/command/carbonTableSchema.scala | 7 +++ .../spark/sql/execution/command/carbonTableSchema.scala | 8 +++- 2 files changed, 14 insertions(+), 1 deletion(-) --
[1/2] incubator-carbondata git commit: fixInsertIntoFromEmptyTable
Repository: incubator-carbondata Updated Branches: refs/heads/master bfeb573dc -> 8a6733041 fixInsertIntoFromEmptyTable Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/a62b99bd Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/a62b99bd Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/a62b99bd Branch: refs/heads/master Commit: a62b99bd9293da9535b9ccfb532ee576c738e71c Parents: bfeb573 Author: QiangCai Authored: Mon Dec 5 15:08:39 2016 +0800 Committer: QiangCai Committed: Mon Dec 5 15:08:39 2016 +0800 -- .../spark/sql/execution/command/carbonTableSchema.scala | 7 +++ .../spark/sql/execution/command/carbonTableSchema.scala | 8 +++- 2 files changed, 14 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/a62b99bd/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala -- diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index 1b939f7..20bb4a3 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -367,6 +367,13 @@ case class LoadTable( def run(sqlContext: SQLContext): Seq[Row] = { +if (dataFrame.isDefined) { + val rdd = dataFrame.get.rdd + if (rdd.partitions == null || rdd.partitions.length == 0) { +LOGGER.warn("DataLoading finished. No data was loaded.") +return Seq.empty + } +} val dbName = getDB.getDatabaseName(databaseNameOp, sqlContext) if (isOverwriteExist) { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/a62b99bd/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index 52fc097..f94e396 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -293,8 +293,14 @@ case class LoadTable( val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName) - def run(sparkSession: SparkSession): Seq[Row] = { +if (dataFrame.isDefined) { + val rdd = dataFrame.get.rdd + if (rdd.partitions == null || rdd.partitions.length == 0) { +LOGGER.warn("DataLoading finished. No data was loaded.") +return Seq.empty + } +} val dbName = databaseNameOp.getOrElse(sparkSession.catalog.currentDatabase) val identifier = TableIdentifier(tableName, Option(dbName))
[2/4] incubator-carbondata git commit: modify compress interface
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/25b4ba2c/core/src/main/java/org/apache/carbondata/core/datastorage/store/compression/type/UnCompressNoneInt.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastorage/store/compression/type/UnCompressNoneInt.java b/core/src/main/java/org/apache/carbondata/core/datastorage/store/compression/type/UnCompressNoneInt.java index d12ebe1..6a3655d 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastorage/store/compression/type/UnCompressNoneInt.java +++ b/core/src/main/java/org/apache/carbondata/core/datastorage/store/compression/type/UnCompressNoneInt.java @@ -24,7 +24,7 @@ import java.nio.ByteBuffer; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.datastorage.store.compression.Compressor; -import org.apache.carbondata.core.datastorage.store.compression.SnappyCompression; +import org.apache.carbondata.core.datastorage.store.compression.CompressorFactory; import org.apache.carbondata.core.datastorage.store.compression.ValueCompressonHolder; import org.apache.carbondata.core.datastorage.store.dataholder.CarbonReadDataHolder; import org.apache.carbondata.core.util.ValueCompressionUtil; @@ -39,7 +39,7 @@ public class UnCompressNoneInt implements ValueCompressonHolder.UnCompressValue< /** * intCompressor. */ - private static Compressor intCompressor = SnappyCompression.SnappyIntCompression.INSTANCE; + private static Compressor compressor = CompressorFactory.getInstance(); /** * value. */ @@ -70,9 +70,8 @@ public class UnCompressNoneInt implements ValueCompressonHolder.UnCompressValue< } @Override public ValueCompressonHolder.UnCompressValue compress() { -UnCompressNoneByte byte1 = new UnCompressNoneByte(this.actualDataType); -byte1.setValue(intCompressor.compress(value)); - +UnCompressNoneByte byte1 = new UnCompressNoneByte(actualDataType); +byte1.setValue(compressor.compressInt(value)); return byte1; } @@ -95,6 +94,11 @@ public class UnCompressNoneInt implements ValueCompressonHolder.UnCompressValue< @Override public CarbonReadDataHolder getValues(int decimal, Object maxValueObject) { switch (actualDataType) { + case DATA_SHORT: +return unCompressShort(); + case DATA_INT: +return unCompressInt(); + case DATA_LONG: case DATA_BIGINT: return unCompressLong(); default: @@ -102,13 +106,28 @@ public class UnCompressNoneInt implements ValueCompressonHolder.UnCompressValue< } } + private CarbonReadDataHolder unCompressShort() { +CarbonReadDataHolder dataHolder = new CarbonReadDataHolder(); +short[] vals = new short[value.length]; +for (int i = 0; i < vals.length; i++) { + vals[i] = (short)value[i]; +} +dataHolder.setReadableShortValues(vals); +return dataHolder; + } + + private CarbonReadDataHolder unCompressInt() { +CarbonReadDataHolder dataHolder = new CarbonReadDataHolder(); +dataHolder.setReadableIntValues(value); +return dataHolder; + } + private CarbonReadDataHolder unCompressDouble() { CarbonReadDataHolder dataHolderInfoObj = new CarbonReadDataHolder(); double[] vals = new double[value.length]; for (int i = 0; i < vals.length; i++) { vals[i] = value[i]; } - dataHolderInfoObj.setReadableDoubleValues(vals); return dataHolderInfoObj; } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/25b4ba2c/core/src/main/java/org/apache/carbondata/core/datastorage/store/compression/type/UnCompressNoneLong.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastorage/store/compression/type/UnCompressNoneLong.java b/core/src/main/java/org/apache/carbondata/core/datastorage/store/compression/type/UnCompressNoneLong.java index 4661b7a..eebfdeb 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastorage/store/compression/type/UnCompressNoneLong.java +++ b/core/src/main/java/org/apache/carbondata/core/datastorage/store/compression/type/UnCompressNoneLong.java @@ -24,7 +24,7 @@ import java.nio.ByteBuffer; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.datastorage.store.compression.Compressor; -import org.apache.carbondata.core.datastorage.store.compression.SnappyCompression; +import org.apache.carbondata.core.datastorage.store.compression.CompressorFactory; import org.apache.carbondata.core.datastorage.store.compression.ValueCompressonHolder; import org.apache.carbondata.core.datastorage.store.dataholder.CarbonReadDataHolder; import org.apache.carbondata.core.util.ValueCompressionUtil; @@ -39,8 +39,7 @@ public
[1/4] incubator-carbondata git commit: modify compress interface
Repository: incubator-carbondata Updated Branches: refs/heads/master 7277355a9 -> b7f3be7e0 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/25b4ba2c/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/CompressedTempSortFileReader.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/CompressedTempSortFileReader.java b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/CompressedTempSortFileReader.java index 729e060..46e7df0 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/CompressedTempSortFileReader.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/CompressedTempSortFileReader.java @@ -21,7 +21,7 @@ package org.apache.carbondata.processing.sortandgroupby.sortdata; import java.io.File; -import org.apache.carbondata.core.datastorage.store.compression.SnappyCompression.SnappyByteCompression; +import org.apache.carbondata.core.datastorage.store.compression.CompressorFactory; public class CompressedTempSortFileReader extends AbstractTempSortFileReader { @@ -45,8 +45,8 @@ public class CompressedTempSortFileReader extends AbstractTempSortFileReader { @Override public Object[][] getRow() { int recordLength = fileHolder.readInt(filePath); int byteArrayLength = fileHolder.readInt(filePath); -byte[] byteArrayFromFile = SnappyByteCompression.INSTANCE -.unCompress(fileHolder.readByteArray(filePath, byteArrayLength)); +byte[] byteArrayFromFile = CompressorFactory.getInstance() +.unCompressByte(fileHolder.readByteArray(filePath, byteArrayLength)); return prepareRecordFromByteBuffer(recordLength, byteArrayFromFile); } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/25b4ba2c/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/CompressedTempSortFileWriter.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/CompressedTempSortFileWriter.java b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/CompressedTempSortFileWriter.java index 466ab08..0e621c1 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/CompressedTempSortFileWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/sortandgroupby/sortdata/CompressedTempSortFileWriter.java @@ -24,7 +24,7 @@ import java.io.DataOutputStream; import java.io.IOException; import org.apache.carbondata.core.constants.CarbonCommonConstants; -import org.apache.carbondata.core.datastorage.store.compression.SnappyCompression.SnappyByteCompression; +import org.apache.carbondata.core.datastorage.store.compression.CompressorFactory; import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.processing.sortandgroupby.exception.CarbonSortKeyAndGroupByException; @@ -65,7 +65,7 @@ public class CompressedTempSortFileWriter extends AbstractTempSortFileWriter { noDictionaryCount, complexDimensionCount); stream.writeInt(records.length); - byte[] byteArray = SnappyByteCompression.INSTANCE.compress(blockDataArray.toByteArray()); + byte[] byteArray = CompressorFactory.getInstance().compressByte(blockDataArray.toByteArray()); stream.writeInt(byteArray.length); stream.write(byteArray); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/25b4ba2c/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java index 279bb63..a94bea5 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java @@ -53,7 +53,7 @@ import org.apache.carbondata.core.carbon.path.CarbonStorePath; import org.apache.carbondata.core.carbon.path.CarbonTablePath; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastorage.store.columnar.IndexStorage; -import org.apache.carbondata.core.datastorage.store.compression.SnappyCompression.SnappyByteCompression; +import org.apache.carbondata.core.datastorage.store.compression.CompressorFactory; import org.apache.carbondata.core.datastorage.store.filesystem.CarbonFile; import org.apache.carbondata.core.datastorage.store.impl.FileFactory; import org.apache.carbo
[4/4] incubator-carbondata git commit: [CARBONDATA-495] Unify compressor interface This closes #401
[CARBONDATA-495] Unify compressor interface This closes #401 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/b7f3be7e Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/b7f3be7e Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/b7f3be7e Branch: refs/heads/master Commit: b7f3be7e03f7f619143b7ec17e748333d9771d6e Parents: 7277355 25b4ba2 Author: jackylk Authored: Tue Dec 6 12:03:53 2016 +0800 Committer: jackylk Committed: Tue Dec 6 12:03:53 2016 +0800 -- .../reader/dimension/AbstractChunkReader.java | 5 +- ...mpressedDimensionChunkFileBasedReaderV1.java | 2 +- ...mpressedDimensionChunkFileBasedReaderV2.java | 5 +- ...CompressedMeasureChunkFileBasedReaderV1.java | 4 +- ...CompressedMeasureChunkFileBasedReaderV2.java | 7 +- .../core/constants/CarbonCommonConstants.java | 11 + .../store/compression/Compressor.java | 26 +- .../store/compression/CompressorFactory.java| 42 +++ .../store/compression/SnappyCompression.java| 273 --- .../store/compression/SnappyCompressor.java | 153 +++ .../compression/ValueCompressonHolder.java | 43 +-- .../compression/type/UnCompressByteArray.java | 12 +- .../compression/type/UnCompressMaxMinByte.java | 10 +- .../type/UnCompressMaxMinByteForLong.java | 8 +- .../type/UnCompressMaxMinDefault.java | 9 +- .../type/UnCompressMaxMinDefaultLong.java | 7 +- .../compression/type/UnCompressMaxMinFloat.java | 11 +- .../compression/type/UnCompressMaxMinInt.java | 8 +- .../compression/type/UnCompressMaxMinLong.java | 9 +- .../compression/type/UnCompressMaxMinShort.java | 8 +- .../type/UnCompressNonDecimalByte.java | 9 +- .../type/UnCompressNonDecimalDefault.java | 7 +- .../type/UnCompressNonDecimalFloat.java | 7 +- .../type/UnCompressNonDecimalInt.java | 9 +- .../type/UnCompressNonDecimalLong.java | 10 +- .../type/UnCompressNonDecimalMaxMinByte.java| 9 +- .../type/UnCompressNonDecimalMaxMinDefault.java | 10 +- .../type/UnCompressNonDecimalMaxMinFloat.java | 7 +- .../type/UnCompressNonDecimalMaxMinInt.java | 9 +- .../type/UnCompressNonDecimalMaxMinLong.java| 7 +- .../type/UnCompressNonDecimalMaxMinShort.java | 9 +- .../type/UnCompressNonDecimalShort.java | 7 +- .../compression/type/UnCompressNoneByte.java| 7 +- .../compression/type/UnCompressNoneDefault.java | 8 +- .../compression/type/UnCompressNoneFloat.java | 13 +- .../compression/type/UnCompressNoneInt.java | 31 ++- .../compression/type/UnCompressNoneLong.java| 9 +- .../compression/type/UnCompressNoneShort.java | 13 +- .../store/dataholder/CarbonReadDataHolder.java | 84 ++ ...HeavyCompressedDoubleArrayDataFileStore.java | 110 .../uncompressed/DoubleArrayDataFileStore.java | 86 -- .../DoubleArrayDataInMemoryStore.java | 5 - .../key/columnar/AbstractColumnarKeyStore.java | 106 --- .../CompressedColumnarFileKeyStore.java | 168 .../CompressedColumnarInMemoryStore.java| 155 --- .../CompressedColumnarKeyStoreUtil.java | 108 .../UnCompressedColumnarFileKeyStore.java | 88 -- .../UnCompressedColumnarInMemoryStore.java | 70 - .../AbstractCompressedSingleArrayStore.java | 119 .../CompressedSingleArrayKeyFileStore.java | 92 --- .../CompressedSingleArrayKeyInMemoryStore.java | 46 .../AbstractSingleArrayKeyStore.java| 107 .../uncompressed/SingleArrayKeyFileStore.java | 104 --- .../SingleArrayKeyInMemoryStore.java| 36 --- .../core/util/CarbonMetadataUtil.java | 6 +- ...ressedDimensionChunkFileBasedReaderTest.java | 18 +- .../sortdata/CompressedTempSortFileReader.java | 6 +- .../sortdata/CompressedTempSortFileWriter.java | 4 +- .../store/writer/AbstractFactDataWriter.java| 4 +- 59 files changed, 412 insertions(+), 1944 deletions(-) --
[3/4] incubator-carbondata git commit: modify compress interface
modify compress interface fix style Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/25b4ba2c Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/25b4ba2c Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/25b4ba2c Branch: refs/heads/master Commit: 25b4ba2c9918c894f44761cf170d85f0a4e2a021 Parents: 7277355 Author: jackylk Authored: Mon Dec 5 23:42:22 2016 +0800 Committer: jackylk Committed: Tue Dec 6 11:26:04 2016 +0800 -- .../reader/dimension/AbstractChunkReader.java | 5 +- ...mpressedDimensionChunkFileBasedReaderV1.java | 2 +- ...mpressedDimensionChunkFileBasedReaderV2.java | 5 +- ...CompressedMeasureChunkFileBasedReaderV1.java | 4 +- ...CompressedMeasureChunkFileBasedReaderV2.java | 7 +- .../core/constants/CarbonCommonConstants.java | 11 + .../store/compression/Compressor.java | 26 +- .../store/compression/CompressorFactory.java| 42 +++ .../store/compression/SnappyCompression.java| 273 --- .../store/compression/SnappyCompressor.java | 153 +++ .../compression/ValueCompressonHolder.java | 43 +-- .../compression/type/UnCompressByteArray.java | 12 +- .../compression/type/UnCompressMaxMinByte.java | 10 +- .../type/UnCompressMaxMinByteForLong.java | 8 +- .../type/UnCompressMaxMinDefault.java | 9 +- .../type/UnCompressMaxMinDefaultLong.java | 7 +- .../compression/type/UnCompressMaxMinFloat.java | 11 +- .../compression/type/UnCompressMaxMinInt.java | 8 +- .../compression/type/UnCompressMaxMinLong.java | 9 +- .../compression/type/UnCompressMaxMinShort.java | 8 +- .../type/UnCompressNonDecimalByte.java | 9 +- .../type/UnCompressNonDecimalDefault.java | 7 +- .../type/UnCompressNonDecimalFloat.java | 7 +- .../type/UnCompressNonDecimalInt.java | 9 +- .../type/UnCompressNonDecimalLong.java | 10 +- .../type/UnCompressNonDecimalMaxMinByte.java| 9 +- .../type/UnCompressNonDecimalMaxMinDefault.java | 10 +- .../type/UnCompressNonDecimalMaxMinFloat.java | 7 +- .../type/UnCompressNonDecimalMaxMinInt.java | 9 +- .../type/UnCompressNonDecimalMaxMinLong.java| 7 +- .../type/UnCompressNonDecimalMaxMinShort.java | 9 +- .../type/UnCompressNonDecimalShort.java | 7 +- .../compression/type/UnCompressNoneByte.java| 7 +- .../compression/type/UnCompressNoneDefault.java | 8 +- .../compression/type/UnCompressNoneFloat.java | 13 +- .../compression/type/UnCompressNoneInt.java | 31 ++- .../compression/type/UnCompressNoneLong.java| 9 +- .../compression/type/UnCompressNoneShort.java | 13 +- .../store/dataholder/CarbonReadDataHolder.java | 84 ++ ...HeavyCompressedDoubleArrayDataFileStore.java | 110 .../uncompressed/DoubleArrayDataFileStore.java | 86 -- .../DoubleArrayDataInMemoryStore.java | 5 - .../key/columnar/AbstractColumnarKeyStore.java | 106 --- .../CompressedColumnarFileKeyStore.java | 168 .../CompressedColumnarInMemoryStore.java| 155 --- .../CompressedColumnarKeyStoreUtil.java | 108 .../UnCompressedColumnarFileKeyStore.java | 88 -- .../UnCompressedColumnarInMemoryStore.java | 70 - .../AbstractCompressedSingleArrayStore.java | 119 .../CompressedSingleArrayKeyFileStore.java | 92 --- .../CompressedSingleArrayKeyInMemoryStore.java | 46 .../AbstractSingleArrayKeyStore.java| 107 .../uncompressed/SingleArrayKeyFileStore.java | 104 --- .../SingleArrayKeyInMemoryStore.java| 36 --- .../core/util/CarbonMetadataUtil.java | 6 +- ...ressedDimensionChunkFileBasedReaderTest.java | 18 +- .../sortdata/CompressedTempSortFileReader.java | 6 +- .../sortdata/CompressedTempSortFileWriter.java | 4 +- .../store/writer/AbstractFactDataWriter.java| 4 +- 59 files changed, 412 insertions(+), 1944 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/25b4ba2c/core/src/main/java/org/apache/carbondata/core/carbon/datastore/chunk/reader/dimension/AbstractChunkReader.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/carbon/datastore/chunk/reader/dimension/AbstractChunkReader.java b/core/src/main/java/org/apache/carbondata/core/carbon/datastore/chunk/reader/dimension/AbstractChunkReader.java index ced33fe..cfe7079 100644 --- a/core/src/main/java/org/apache/carbondata/core/carbon/datastore/chunk/reader/dimension/AbstractChunkReader.java +++ b/core/src/main/java/org/apache/carbondata/core/carbon
[1/2] incubator-carbondata git commit: Remove unused code in CarbonRow
Repository: incubator-carbondata Updated Branches: refs/heads/master bc8265c9a -> e7ee09c1a Remove unused code in CarbonRow Remove unused code in CarbonRow fix style Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/e14529a2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/e14529a2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/e14529a2 Branch: refs/heads/master Commit: e14529a27ba67df87ecc7dd67f856a5f0038fe0b Parents: bc8265c Author: qiuheng Authored: Tue Dec 6 23:07:59 2016 +0800 Committer: jackylk Committed: Wed Dec 7 00:09:02 2016 +0800 -- .../processing/newflow/row/CarbonRow.java | 25 1 file changed, 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e14529a2/processing/src/main/java/org/apache/carbondata/processing/newflow/row/CarbonRow.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/row/CarbonRow.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/row/CarbonRow.java index 68b87a9..daf37fb 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/newflow/row/CarbonRow.java +++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/row/CarbonRow.java @@ -19,7 +19,6 @@ package org.apache.carbondata.processing.newflow.row; -import java.math.BigDecimal; import java.util.Arrays; /** @@ -41,26 +40,6 @@ public class CarbonRow { this.data = data; } - public int getInt(int ordinal) { -return (int) data[ordinal]; - } - - public long getLong(int ordinal) { -return (long) data[ordinal]; - } - - public float getFloat(int ordinal) { -return (float) data[ordinal]; - } - - public double getDouble(int ordinal) { -return (double) data[ordinal]; - } - - public BigDecimal getDecimal(int ordinal) { -return (BigDecimal) data[ordinal]; - } - public String getString(int ordinal) { return (String) data[ordinal]; } @@ -69,10 +48,6 @@ public class CarbonRow { return data[ordinal]; } - public byte[] getBinary(int ordinal) { -return (byte[]) data[ordinal]; - } - public Object[] getObjectArray(int ordinal) { return (Object[]) data[ordinal]; }
[2/2] incubator-carbondata git commit: Remove unused code in CarbonRow This close #405
Remove unused code in CarbonRow This close #405 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/e7ee09c1 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/e7ee09c1 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/e7ee09c1 Branch: refs/heads/master Commit: e7ee09c1aeae6d32df850d01cad9bf37ead0b7d5 Parents: bc8265c e14529a Author: jackylk Authored: Wed Dec 7 00:10:27 2016 +0800 Committer: jackylk Committed: Wed Dec 7 00:10:27 2016 +0800 -- .../processing/newflow/row/CarbonRow.java | 25 1 file changed, 25 deletions(-) --
[1/2] incubator-carbondata git commit: Clean up CarbonUtil
Repository: incubator-carbondata Updated Branches: refs/heads/master e7ee09c1a -> dc567615d Clean up CarbonUtil Clean up CarbonUtil code fix style Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/391feea3 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/391feea3 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/391feea3 Branch: refs/heads/master Commit: 391feea333e70efef08c3b3a7ea304d4f5012634 Parents: e7ee09c Author: qiuheng Authored: Tue Dec 6 23:16:57 2016 +0800 Committer: jackylk Committed: Wed Dec 7 00:47:52 2016 +0800 -- .../apache/carbondata/core/util/CarbonUtil.java | 253 --- 1 file changed, 253 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/391feea3/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index 0fb69ce..6531352 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -37,13 +37,10 @@ import java.nio.charset.Charset; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; @@ -61,7 +58,6 @@ import org.apache.carbondata.core.carbon.path.CarbonStorePath; import org.apache.carbondata.core.carbon.path.CarbonTablePath; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastorage.store.columnar.ColumnGroupModel; -import org.apache.carbondata.core.datastorage.store.columnar.ColumnarKeyStoreDataHolder; import org.apache.carbondata.core.datastorage.store.columnar.UnBlockIndexer; import org.apache.carbondata.core.datastorage.store.compression.MeasureMetaDataModel; import org.apache.carbondata.core.datastorage.store.compression.WriterCompressModel; @@ -82,10 +78,8 @@ import org.apache.thrift.TException; import org.apache.thrift.protocol.TCompactProtocol; import org.apache.thrift.protocol.TProtocol; import org.apache.thrift.transport.TIOStreamTransport; - import org.pentaho.di.core.exception.KettleException; - public final class CarbonUtil { public static final String HDFS_PREFIX = "hdfs://"; @@ -149,56 +143,6 @@ public final class CarbonUtil { stream.close(); } } - /** - * @param baseStorePath - * @return - */ - private static int createBaseStoreFolders(String baseStorePath) { -FileFactory.FileType fileType = FileFactory.getFileType(baseStorePath); -try { - if (!FileFactory.isFileExist(baseStorePath, fileType, false)) { -if (!FileFactory.mkdirs(baseStorePath, fileType)) { - return -1; -} - } -} catch (Exception e) { - return -1; -} -return 1; - } - - /** - * @param filterType - * @param listFiles - * @param counter - * @return - */ - private static int findCounterValue(final String filterType, CarbonFile[] listFiles, - int counter) { -if ("Load_".equals(filterType)) { - for (CarbonFile files : listFiles) { -String folderName = getFolderName(files); -if (folderName.indexOf('.') > -1) { - folderName = folderName.substring(0, folderName.indexOf('.')); -} -String[] split = folderName.split("_"); - -if (split.length > 1 && counter < Integer.parseInt(split[1])) { - counter = Integer.parseInt(split[1]); -} - } -} else { - // Iterate list of Directories and find the counter value - for (CarbonFile eachFile : listFiles) { -String folderName = getFolderName(eachFile); -String[] split = folderName.split("_"); -if (counter < Integer.parseInt(split[1])) { - counter = Integer.parseInt(split[1]); -} - } -} -return counter; - } /** * @param eachFile @@ -431,54 +375,6 @@ public final class CarbonUtil { } /** - * This function will rename the table to be deleted - * - * @param partitionCount - * @param storePath - * @param databaseName - * @param tableName - */ - public static void renameTableForDeletion(int partitionCo
[2/2] incubator-carbondata git commit: Clean up CarbonUtil This closes #406
Clean up CarbonUtil This closes #406 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/dc567615 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/dc567615 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/dc567615 Branch: refs/heads/master Commit: dc567615d7f1c3175d5bc127aaeec229745640a8 Parents: e7ee09c 391feea Author: jackylk Authored: Wed Dec 7 00:48:30 2016 +0800 Committer: jackylk Committed: Wed Dec 7 00:48:30 2016 +0800 -- .../apache/carbondata/core/util/CarbonUtil.java | 253 --- 1 file changed, 253 deletions(-) --
[2/2] incubator-carbondata git commit: [CARBONDATA-497][SPARK2]fix datatype issue of CarbonLateDecoderRule This closes #403
[CARBONDATA-497][SPARK2]fix datatype issue of CarbonLateDecoderRule This closes #403 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/47658b17 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/47658b17 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/47658b17 Branch: refs/heads/master Commit: 47658b17d9dfd9bb150f8e3ea38bcb1c26dd89e4 Parents: ac45755 a9553e6 Author: jackylk Authored: Wed Dec 7 15:22:15 2016 +0800 Committer: jackylk Committed: Wed Dec 7 15:22:15 2016 +0800 -- examples/spark2/src/main/resources/data.csv | 20 +-- .../carbondata/examples/CarbonExample.scala | 10 +- .../spark/sql/CarbonDictionaryDecoder.scala | 1 - .../execution/CarbonLateDecodeStrategy.scala| 157 ++- .../sql/optimizer/CarbonLateDecodeRule.scala| 128 +-- .../carbondata/CarbonDataSourceSuite.scala | 28 +++- 6 files changed, 236 insertions(+), 108 deletions(-) --
[1/2] incubator-carbondata git commit: fixlatedecoder
Repository: incubator-carbondata Updated Branches: refs/heads/master ac4575536 -> 47658b17d fixlatedecoder fix comments Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/a9553e6b Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/a9553e6b Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/a9553e6b Branch: refs/heads/master Commit: a9553e6b8d086b98fab6df7b93a4e78150796fc9 Parents: ac45755 Author: QiangCai Authored: Tue Dec 6 17:40:21 2016 +0800 Committer: jackylk Committed: Wed Dec 7 15:21:51 2016 +0800 -- examples/spark2/src/main/resources/data.csv | 20 +-- .../carbondata/examples/CarbonExample.scala | 10 +- .../spark/sql/CarbonDictionaryDecoder.scala | 1 - .../execution/CarbonLateDecodeStrategy.scala| 157 ++- .../sql/optimizer/CarbonLateDecodeRule.scala| 128 +-- .../carbondata/CarbonDataSourceSuite.scala | 28 +++- 6 files changed, 236 insertions(+), 108 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/a9553e6b/examples/spark2/src/main/resources/data.csv -- diff --git a/examples/spark2/src/main/resources/data.csv b/examples/spark2/src/main/resources/data.csv index fcdf3c1..b44672f 100644 --- a/examples/spark2/src/main/resources/data.csv +++ b/examples/spark2/src/main/resources/data.csv @@ -1,10 +1,10 @@ -1,10,100,48.4,spark,2015/4/23 -5,17,140,43.4,spark,2015/7/27 -1,11,100,44.4,flink,2015/5/23 -1,10,150,43.4,spark,2015/7/24 -1,10,100,47.4,spark,2015/7/23 -3,14,160,43.4,hive,2015/7/26 -2,10,100,43.4,impala,2015/7/23 -1,10,100,43.4,spark,2015/5/23 -4,16,130,42.4,impala,2015/7/23 -1,10,100,43.4,spark,2015/7/23 \ No newline at end of file +1,10,100,48.4,spark,2015/4/23,1.23 +5,17,140,43.4,spark,2015/7/27,3.45 +1,11,100,44.4,flink,2015/5/23,23.23 +1,10,150,43.4,spark,2015/7/24,254.12 +1,10,100,47.4,spark,2015/7/23,876.14 +3,14,160,43.4,hive,2015/7/26,3454.32 +2,10,100,43.4,impala,2015/7/23,456.98 +1,10,100,43.4,spark,2015/5/23,32.53 +4,16,130,42.4,impala,2015/7/23,67.23 +1,10,100,43.4,spark,2015/7/23,832.23 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/a9553e6b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala -- diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala index 59cc4e9..17674ef 100644 --- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala +++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala @@ -68,8 +68,8 @@ object CarbonExample { |bigintField long, |doubleField double, |stringField string, - |timestampField timestamp - | ) + |timestampField timestamp, + |decimalField decimal(18,2)) | USING org.apache.spark.sql.CarbonSource """.stripMargin) @@ -86,7 +86,8 @@ object CarbonExample { |bigintField long, |doubleField double, |stringField string, - |timestampField string) + |timestampField string, + |decimalField decimal(18,2)) |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' """.stripMargin) @@ -105,13 +106,14 @@ object CarbonExample { s""" | INSERT INTO TABLE carbon_table | SELECT shortField, intField, bigintField, doubleField, stringField, - | from_unixtime(unix_timestamp(timestampField,'/M/dd')) timestampField + | from_unixtime(unix_timestamp(timestampField,'/M/dd')) timestampField, decimalField | FROM csv_table """.stripMargin) spark.sql(""" SELECT * FROM carbon_table + where stringfield = 'spark' and decimalField > 40 """).show spark.sql(""" http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/a9553e6b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala index c7ca61d..db864c7 100644 --- a/integration/spark2/src/main/scala/o
[2/2] incubator-carbondata git commit: [CARBONDATA-513] reduce new too many BigDecimal This closes #409
[CARBONDATA-513] reduce new too many BigDecimal This closes #409 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/8275640e Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/8275640e Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/8275640e Branch: refs/heads/master Commit: 8275640e0c25ecb751f2f9878a2cc9763b6b6f3c Parents: 5612a3a f853998 Author: jackylk Authored: Wed Dec 7 19:13:25 2016 +0800 Committer: jackylk Committed: Wed Dec 7 19:13:25 2016 +0800 -- .../org/apache/carbondata/core/util/DataTypeUtil.java | 11 ++- .../collector/impl/AbstractScannedResultCollector.java | 12 .../apache/carbondata/core/util/DataTypeUtilTest.java | 6 +- .../org/apache/spark/sql/SparkUnknownExpression.scala | 6 +- .../org/apache/spark/sql/SparkUnknownExpression.scala | 5 + 5 files changed, 13 insertions(+), 27 deletions(-) --
[1/2] incubator-carbondata git commit: new code for carbondata-513, fix conflicts
Repository: incubator-carbondata Updated Branches: refs/heads/master 5612a3a59 -> 8275640e0 new code for carbondata-513, fix conflicts fix checkstyle Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/f853998f Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/f853998f Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/f853998f Branch: refs/heads/master Commit: f853998fba87791889654b3a43a9577f9907200a Parents: 5612a3a Author: piaoyats Authored: Wed Dec 7 17:33:24 2016 +0800 Committer: jackylk Committed: Wed Dec 7 19:10:37 2016 +0800 -- .../org/apache/carbondata/core/util/DataTypeUtil.java | 11 ++- .../collector/impl/AbstractScannedResultCollector.java | 12 .../apache/carbondata/core/util/DataTypeUtilTest.java | 6 +- .../org/apache/spark/sql/SparkUnknownExpression.scala | 6 +- .../org/apache/spark/sql/SparkUnknownExpression.scala | 5 + 5 files changed, 13 insertions(+), 27 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f853998f/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java index b97fd92..d179fca 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataTypeUtil.java @@ -266,10 +266,7 @@ public final class DataTypeUtil { return null; } java.math.BigDecimal javaDecVal = new java.math.BigDecimal(data); - scala.math.BigDecimal scalaDecVal = new scala.math.BigDecimal(javaDecVal); - org.apache.spark.sql.types.Decimal decConverter = - new org.apache.spark.sql.types.Decimal(); - return decConverter.set(scalaDecVal); + return org.apache.spark.sql.types.Decimal.apply(javaDecVal); default: return UTF8String.fromString(data); } @@ -292,11 +289,7 @@ public final class DataTypeUtil { case LONG: return data; case DECIMAL: - java.math.BigDecimal javaDecVal = new java.math.BigDecimal(data.toString()); - scala.math.BigDecimal scalaDecVal = new scala.math.BigDecimal(javaDecVal); - org.apache.spark.sql.types.Decimal decConverter = - new org.apache.spark.sql.types.Decimal(); - return decConverter.set(scalaDecVal); + return org.apache.spark.sql.types.Decimal.apply((java.math.BigDecimal) data); default: return data; } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f853998f/core/src/main/java/org/apache/carbondata/scan/collector/impl/AbstractScannedResultCollector.java -- diff --git a/core/src/main/java/org/apache/carbondata/scan/collector/impl/AbstractScannedResultCollector.java b/core/src/main/java/org/apache/carbondata/scan/collector/impl/AbstractScannedResultCollector.java index 90d4bd7..ad52b5e 100644 --- a/core/src/main/java/org/apache/carbondata/scan/collector/impl/AbstractScannedResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/scan/collector/impl/AbstractScannedResultCollector.java @@ -25,7 +25,6 @@ import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.carbon.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.carbon.metadata.datatype.DataType; import org.apache.carbondata.core.keygenerator.KeyGenException; -import org.apache.carbondata.core.util.DataTypeUtil; import org.apache.carbondata.scan.collector.ScannedResultCollector; import org.apache.carbondata.scan.executor.infos.BlockExecutionInfo; import org.apache.carbondata.scan.executor.infos.KeyStructureInfo; @@ -100,20 +99,17 @@ public abstract class AbstractScannedResultCollector implements ScannedResultCol private Object getMeasureData(MeasureColumnDataChunk dataChunk, int index, DataType dataType) { if (!dataChunk.getNullValueIndexHolder().getBitSet().get(index)) { - Object msrVal; switch (dataType) { case SHORT: case INT: case LONG: - msrVal = dataChunk.getMeasureDataHolder().getReadableLongValueByIndex(index); - break; + return dataChunk.getMeasureDataHolder().getReadableLongValueByIndex(index); case DECIMAL: - msrVal = dataChunk.getMeasureDataHolder().getReadableBigDecimalValueByIndex(index); - break; + return org.apache.spark.sql.types.Decimal.ap
[1/2] incubator-carbondata git commit: use carbon property to get the store path
Repository: incubator-carbondata Updated Branches: refs/heads/master e4c770d6b -> 98564f04d use carbon property to get the store path remove nouse imports fix style asle set kettle home in carbon property Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/f67ec012 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/f67ec012 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/f67ec012 Branch: refs/heads/master Commit: f67ec0125247e98d7925866fd3062ee3ddf16e28 Parents: e4c770d Author: wangfei Authored: Fri Dec 9 17:28:59 2016 +0800 Committer: jackylk Committed: Sat Dec 10 12:19:41 2016 +0800 -- .../org/apache/carbondata/core/util/CarbonProperties.java| 3 ++- .../scala/org/apache/carbondata/examples/CarbonExample.scala | 8 +--- .../org/apache/carbondata/spark/util/CarbonScalaUtil.scala | 5 + .../src/main/scala/org/apache/spark/sql/CarbonEnv.scala | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f67ec012/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java index 3657215..b8077dc 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java @@ -360,8 +360,9 @@ public final class CarbonProperties { * @param key * @return properties value */ - public void addProperty(String key, String value) { + public CarbonProperties addProperty(String key, String value) { carbonProperties.setProperty(key, value); +return this; } private ColumnarFormatVersion getDefaultFormatVersion() { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f67ec012/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala -- diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala index 17674ef..0aeb8be 100644 --- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala +++ b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala @@ -47,13 +47,15 @@ object CarbonExample { .master("local") .appName("CarbonExample") .enableHiveSupport() -.config("carbon.kettle.home", - s"$rootPath/processing/carbonplugins") -.config("carbon.storelocation", storeLocation) .config("spark.sql.warehouse.dir", warehouse) .config("javax.jdo.option.ConnectionURL", s"jdbc:derby:;databaseName=$metastoredb;create=true") .getOrCreate() + +CarbonProperties.getInstance() + .addProperty("carbon.kettle.home", s"$rootPath/processing/carbonplugins") + .addProperty("carbon.storelocation", storeLocation) + spark.sparkContext.setLogLevel("WARN") CarbonProperties.getInstance() http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f67ec012/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala index dc63186..1cdd497 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/CarbonScalaUtil.scala @@ -91,10 +91,7 @@ object CarbonScalaUtil { } def getKettleHome(sqlContext: SQLContext): String = { -var kettleHomePath = sqlContext.getConf("carbon.kettle.home", null) -if (null == kettleHomePath) { - kettleHomePath = CarbonProperties.getInstance.getProperty("carbon.kettle.home") -} +var kettleHomePath = CarbonProperties.getInstance.getProperty("carbon.kettle.home") if (null == kettleHomePath) { val carbonHome = System.getenv("CARBON_HOME") if (null != carbonHome) { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f67ec012/integration/spark2/src/main/sc
[2/2] incubator-carbondata git commit: [CARBONDATA-517] Use carbon property to get the store path/kettle home This closed #414
[CARBONDATA-517] Use carbon property to get the store path/kettle home This closed #414 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/98564f04 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/98564f04 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/98564f04 Branch: refs/heads/master Commit: 98564f04d7c47846c6ce49c54ebd2bb5189335bf Parents: e4c770d f67ec01 Author: jackylk Authored: Sat Dec 10 12:20:29 2016 +0800 Committer: jackylk Committed: Sat Dec 10 12:20:29 2016 +0800 -- .../org/apache/carbondata/core/util/CarbonProperties.java| 3 ++- .../scala/org/apache/carbondata/examples/CarbonExample.scala | 8 +--- .../org/apache/carbondata/spark/util/CarbonScalaUtil.scala | 5 + .../src/main/scala/org/apache/spark/sql/CarbonEnv.scala | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) --
[1/2] incubator-carbondata git commit: do not use inner interface of spark
Repository: incubator-carbondata Updated Branches: refs/heads/master 56aa1f8c0 -> 2a6d097d1 do not use inner interface of spark style add r style comment fix Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/06d44608 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/06d44608 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/06d44608 Branch: refs/heads/master Commit: 06d44608acaebe99a5a99e754a27e92262242004 Parents: 56aa1f8 Author: wangfei Authored: Sun Dec 11 07:04:37 2016 +0800 Committer: jackylk Committed: Sun Dec 11 14:34:15 2016 +0800 -- .../spark/sql/CarbonDictionaryDecoder.scala | 13 ++--- .../spark/sql/SparkUnknownExpression.scala | 4 +- .../execution/CarbonLateDecodeStrategy.scala| 56 +++- 3 files changed, 39 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/06d44608/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala index db864c7..940c6d7 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDictionaryDecoder.scala @@ -181,7 +181,7 @@ case class CarbonDictionaryDecoder( getDictionaryColumnIds(index)._3) } } - val result = unsafeProjection(new GenericMutableRow(data)) + val result = unsafeProjection(new GenericInternalRow(data)) total += System.currentTimeMillis() - startTime result } @@ -223,11 +223,12 @@ case class CarbonDictionaryDecoder( -class CarbonDecoderRDD(relations: Seq[CarbonDecoderRelation], -profile: CarbonProfile, -aliasMap: CarbonAliasDecoderRelation, -prev: RDD[Row], - output: Seq[Attribute]) +class CarbonDecoderRDD( +relations: Seq[CarbonDecoderRelation], +profile: CarbonProfile, +aliasMap: CarbonAliasDecoderRelation, +prev: RDD[Row], +output: Seq[Attribute]) extends RDD[Row](prev) { def canBeDecoded(attr: Attribute): Boolean = { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/06d44608/integration/spark2/src/main/scala/org/apache/spark/sql/SparkUnknownExpression.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/SparkUnknownExpression.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/SparkUnknownExpression.scala index 1a310c7..b4b0f3c 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/SparkUnknownExpression.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/SparkUnknownExpression.scala @@ -22,7 +22,7 @@ import java.util.{ArrayList, List} import scala.collection.JavaConverters._ import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.{Expression => SparkExpression, GenericMutableRow} +import org.apache.spark.sql.catalyst.expressions.{Expression => SparkExpression, GenericInternalRow} import org.apache.carbondata.core.carbon.metadata.encoder.Encoding import org.apache.carbondata.scan.expression.{ColumnExpression, ExpressionResult, UnknownExpression} @@ -48,7 +48,7 @@ class SparkUnknownExpression(var sparkExp: SparkExpression) } try { val result = evaluateExpression( -new GenericMutableRow(values.map(a => a.asInstanceOf[Any]).toArray)) +new GenericInternalRow(values.map(a => a.asInstanceOf[Any]).toArray)) val sparkRes = if (isExecutor) { result.asInstanceOf[InternalRow].get(0, sparkExp.dataType) } else { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/06d44608/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala index 57b2139..7a8920f 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala @@ -26,6
[2/2] incubator-carbondata git commit: [CARBONDATA-521]Depends on more stable class of spark in spark2 This closes #415
[CARBONDATA-521]Depends on more stable class of spark in spark2 This closes #415 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/2a6d097d Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/2a6d097d Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/2a6d097d Branch: refs/heads/master Commit: 2a6d097d19bfd91a0f25de882125fa26ad4a9756 Parents: 56aa1f8 06d4460 Author: jackylk Authored: Sun Dec 11 14:34:42 2016 +0800 Committer: jackylk Committed: Sun Dec 11 14:34:42 2016 +0800 -- .../spark/sql/CarbonDictionaryDecoder.scala | 13 ++--- .../spark/sql/SparkUnknownExpression.scala | 4 +- .../execution/CarbonLateDecodeStrategy.scala| 56 +++- 3 files changed, 39 insertions(+), 34 deletions(-) --
[2/2] incubator-carbondata git commit: [CARBONDATA-520] Executor can not get the read support class This closes #417
[CARBONDATA-520] Executor can not get the read support class This closes #417 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/5c476ec4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/5c476ec4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/5c476ec4 Branch: refs/heads/master Commit: 5c476ec40c1c1e610a493e547cd67b3502a04202 Parents: 2a6d097 656f3ee Author: jackylk Authored: Sun Dec 11 14:41:05 2016 +0800 Committer: jackylk Committed: Sun Dec 11 14:41:05 2016 +0800 -- .../carbondata/spark/rdd/CarbonScanRDD.scala| 2 +- .../carbondata/spark/rdd/SparkCommonEnv.scala | 30 .../carbondata/spark/rdd/SparkReadSupport.scala | 28 ++ .../spark/sql/hive/DistributionUtil.scala | 4 --- .../scala/org/apache/spark/sql/CarbonEnv.scala | 14 +++-- .../sql/CarbonDatasourceHadoopRelation.scala| 4 --- .../scala/org/apache/spark/sql/CarbonEnv.scala | 15 +++--- 7 files changed, 37 insertions(+), 60 deletions(-) --
[1/2] incubator-carbondata git commit: fix conf issue for scanrdd
Repository: incubator-carbondata Updated Branches: refs/heads/master 2a6d097d1 -> 5c476ec40 fix conf issue for scanrdd new version use org.apache.carbondata.hadoop.readsupport.impl.RawDataReadSupport in spark1 compile issue Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/656f3ee2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/656f3ee2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/656f3ee2 Branch: refs/heads/master Commit: 656f3ee2f56c31abdb5c241fa56cf86438489771 Parents: 2a6d097 Author: wangfei Authored: Sun Dec 11 07:24:03 2016 +0800 Committer: jackylk Committed: Sun Dec 11 14:40:48 2016 +0800 -- .../carbondata/spark/rdd/CarbonScanRDD.scala| 2 +- .../carbondata/spark/rdd/SparkCommonEnv.scala | 30 .../carbondata/spark/rdd/SparkReadSupport.scala | 28 ++ .../spark/sql/hive/DistributionUtil.scala | 4 --- .../scala/org/apache/spark/sql/CarbonEnv.scala | 14 +++-- .../sql/CarbonDatasourceHadoopRelation.scala| 4 --- .../scala/org/apache/spark/sql/CarbonEnv.scala | 15 +++--- 7 files changed, 37 insertions(+), 60 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/656f3ee2/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala index a750b10..d654067 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala @@ -197,7 +197,7 @@ class CarbonScanRDD[V: ClassTag]( } private def prepareInputFormatForExecutor(conf: Configuration): CarbonInputFormat[V] = { -CarbonInputFormat.setCarbonReadSupport(conf, SparkCommonEnv.readSupportClass) +CarbonInputFormat.setCarbonReadSupport(conf, SparkReadSupport.readSupportClass) createInputFormat(conf) } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/656f3ee2/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/SparkCommonEnv.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/SparkCommonEnv.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/SparkCommonEnv.scala deleted file mode 100644 index bf614b1..000 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/SparkCommonEnv.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.spark.rdd - -import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport - -// Used to solve cyclic-dependency issue of carbon-spark-common and carbon-spark, carbon-spark2 -// modules, variables or functions that different in carbon-spark and carbon-spark2 are set here -object SparkCommonEnv { - - var readSupportClass: Class[_ <: CarbonReadSupport[_]] = _ - - var numExistingExecutors: Int = _ - -} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/656f3ee2/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/SparkReadSupport.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/SparkReadSupport.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/SparkReadSupport.scala new file mode 100644 index 000..3d78f0e --- /dev/null +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/SparkReadSupport.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the
[1/2] incubator-carbondata git commit: support octal escape delimited
Repository: incubator-carbondata Updated Branches: refs/heads/master 05f56afdc -> 00c479463 support octal escape delimited style style add testcase load csv with delimiter char \017 add testcase load csv with delimiter char \017 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/c1c9ac52 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/c1c9ac52 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/c1c9ac52 Branch: refs/heads/master Commit: c1c9ac529c026d10a2de168dedda98f7f302e44a Parents: 05f56af Author: boczhaow Authored: Tue Dec 13 11:06:54 2016 +0800 Committer: jackylk Committed: Tue Dec 13 15:27:44 2016 +0800 -- .../apache/carbondata/core/util/CarbonUtil.java| 17 + .../src/test/resources/sample_withDelimiter017.csv | 5 + .../testsuite/dataload/TestLoadDataGeneral.scala | 9 + 3 files changed, 15 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c1c9ac52/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index 6531352..3980cb3 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -599,22 +599,7 @@ public final class CarbonUtil { * @return */ public static String unescapeChar(String parseStr) { -switch (parseStr) { - case "\\001": -return "\001"; - case "\\t": -return "\t"; - case "\\r": -return "\r"; - case "\\b": -return "\b"; - case "\\f": -return "\f"; - case "\\n": -return "\n"; - default: -return parseStr; -} +return scala.StringContext.treatEscapes(parseStr); } /** http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c1c9ac52/integration/spark/src/test/resources/sample_withDelimiter017.csv -- diff --git a/integration/spark/src/test/resources/sample_withDelimiter017.csv b/integration/spark/src/test/resources/sample_withDelimiter017.csv new file mode 100644 index 000..c40b03a --- /dev/null +++ b/integration/spark/src/test/resources/sample_withDelimiter017.csv @@ -0,0 +1,5 @@ +idnamecityage +1davidshenzhen31 +2easonshenzhen27 +3jarrywuhan35 +3jarryBangalore35 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c1c9ac52/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala -- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala index 7e102df..9904c93 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala @@ -79,6 +79,15 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterAll { ) } + test("test data loading CSV file with delimiter char \\017") { +val testData = currentDirectory + "/src/test/resources/sample_withDelimiter017.csv" +sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest options ('delimiter'='\\017')") +checkAnswer( + sql("SELECT COUNT(*) FROM loadtest"), + Seq(Row(20)) +) + } + test("test data loading with invalid values for mesasures") { val testData = currentDirectory + "/src/test/resources/invalidMeasures.csv" sql("drop table if exists invalidMeasures")
[2/2] incubator-carbondata git commit: [CARBONDATA-528]support octal escape delimited This closes #424
[CARBONDATA-528]support octal escape delimited This closes #424 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/00c47946 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/00c47946 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/00c47946 Branch: refs/heads/master Commit: 00c479463a53ec55582a083cbe97bfcfb44f5838 Parents: 05f56af c1c9ac5 Author: jackylk Authored: Tue Dec 13 15:28:04 2016 +0800 Committer: jackylk Committed: Tue Dec 13 15:28:04 2016 +0800 -- .../apache/carbondata/core/util/CarbonUtil.java| 17 + .../src/test/resources/sample_withDelimiter017.csv | 5 + .../testsuite/dataload/TestLoadDataGeneral.scala | 9 + 3 files changed, 15 insertions(+), 16 deletions(-) --
[2/2] incubator-carbondata git commit: [CARBONDATA-431]bigdecimal compression. This closes #388
[CARBONDATA-431]bigdecimal compression. This closes #388 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/05497d0d Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/05497d0d Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/05497d0d Branch: refs/heads/master Commit: 05497d0d1b9bfc61cb6ebdd488f07aa772b7d0b9 Parents: 00c4794 63d6626 Author: jackylk Authored: Tue Dec 13 15:35:21 2016 +0800 Committer: jackylk Committed: Tue Dec 13 15:35:21 2016 +0800 -- .../core/compression/BigDecimalCompressor.java | 76 + .../core/compression/BigIntCompressor.java | 26 ++ .../core/compression/ValueCompressor.java | 9 + .../store/compression/WriterCompressModel.java | 11 + .../compression/type/UnCompressBigDecimal.java | 112 +++ .../type/UnCompressBigDecimalByte.java | 134 + .../store/dataholder/CarbonReadDataHolder.java | 3 + .../store/dataholder/CarbonWriteDataHolder.java | 50 ...ractHeavyCompressedDoubleArrayDataStore.java | 10 +- .../core/util/BigDecimalCompressionFinder.java | 96 ++ .../carbondata/core/util/CompressionFinder.java | 145 + .../core/util/ValueCompressionUtil.java | 291 +++ ...mpressedMeasureChunkFileBasedReaderTest.java | 98 +++ .../core/util/ValueCompressionUtilTest.java | 2 +- .../store/CarbonFactDataHandlerColumnar.java| 59 +++- 15 files changed, 863 insertions(+), 259 deletions(-) --
[1/2] incubator-carbondata git commit: rebased code. fixed review comment.
Repository: incubator-carbondata Updated Branches: refs/heads/master 00c479463 -> 05497d0d1 rebased code. fixed review comment. BigDecimal compression Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/63d66264 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/63d66264 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/63d66264 Branch: refs/heads/master Commit: 63d66264cb60338845c87f7f627019bb843844aa Parents: 00c4794 Author: ashok.blend Authored: Sat Dec 3 13:05:38 2016 -0800 Committer: jackylk Committed: Tue Dec 13 15:34:38 2016 +0800 -- .../core/compression/BigDecimalCompressor.java | 76 + .../core/compression/BigIntCompressor.java | 26 ++ .../core/compression/ValueCompressor.java | 9 + .../store/compression/WriterCompressModel.java | 11 + .../compression/type/UnCompressBigDecimal.java | 112 +++ .../type/UnCompressBigDecimalByte.java | 134 + .../store/dataholder/CarbonReadDataHolder.java | 3 + .../store/dataholder/CarbonWriteDataHolder.java | 50 ...ractHeavyCompressedDoubleArrayDataStore.java | 10 +- .../core/util/BigDecimalCompressionFinder.java | 96 ++ .../carbondata/core/util/CompressionFinder.java | 145 + .../core/util/ValueCompressionUtil.java | 291 +++ ...mpressedMeasureChunkFileBasedReaderTest.java | 98 +++ .../core/util/ValueCompressionUtilTest.java | 2 +- .../store/CarbonFactDataHandlerColumnar.java| 59 +++- 15 files changed, 863 insertions(+), 259 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/63d66264/core/src/main/java/org/apache/carbondata/core/compression/BigDecimalCompressor.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/compression/BigDecimalCompressor.java b/core/src/main/java/org/apache/carbondata/core/compression/BigDecimalCompressor.java new file mode 100644 index 000..85c9927 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/compression/BigDecimalCompressor.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.carbondata.core.compression; + +import org.apache.carbondata.core.datastorage.store.dataholder.CarbonWriteDataHolder; +import org.apache.carbondata.core.util.BigDecimalCompressionFinder; +import org.apache.carbondata.core.util.CompressionFinder; +import org.apache.carbondata.core.util.ValueCompressionUtil.DataType; + +/** + * Bigdecimal data type compressor + * + */ +public class BigDecimalCompressor extends BigIntCompressor { + + private boolean readLeft = true; + + @Override + public Object getCompressedValues(CompressionFinder compressionFinder, + CarbonWriteDataHolder dataHolder, Object maxValue, int decimal) { +BigDecimalCompressionFinder bigdCompressionFinder = +(BigDecimalCompressionFinder) compressionFinder; +Long[] maxValues = (Long[]) maxValue; +Object leftCompressedValue = getCompressedValues( +bigdCompressionFinder.getLeftCompType(), dataHolder, +bigdCompressionFinder.getLeftChangedDataType(), maxValues[0], 0); +readLeft = false; +Object rightCompressedValue = getCompressedValues( +bigdCompressionFinder.getRightCompType(), dataHolder, +bigdCompressionFinder.getRightChangedDataType(), maxValues[1], 0); +return new Object[] { leftCompressedValue, rightCompressedValue }; + } + + @Override + protected Object compressMaxMin(DataType changedDataType, + CarbonWriteDataHolder dataHolder, Object max) { +long maxValue = (long) max; +long[][] writableBigDValues = dataHolder.getWritableBigDecimalValues(); +long[] value = null; +if (readLeft) { + value = writableBigDValues[0]; +} else { + value = writableBigDValues[1]; +} +return compressMaxMin(changedDataType, maxValue, value); + } + +
[2/2] incubator-carbondata git commit: [MINOR-FIX]change the declared package of these four java files This closes #426
[MINOR-FIX]change the declared package of these four java files This closes #426 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/89405143 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/89405143 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/89405143 Branch: refs/heads/master Commit: 89405143296271a67348e81738edca35b78c65ec Parents: 05497d0 99e79b5 Author: jackylk Authored: Tue Dec 13 16:40:53 2016 +0800 Committer: jackylk Committed: Tue Dec 13 16:40:53 2016 +0800 -- .../core/datastorage/store/filesystem/AlluxioCarbonFileTest.java | 2 +- .../core/datastorage/store/filesystem/HDFSCarbonFileTest.java | 2 +- .../core/datastorage/store/filesystem/LocalCarbonFileTest.java | 2 +- .../core/datastorage/store/filesystem/ViewFsCarbonFileTest.java| 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) --
[1/2] incubator-carbondata git commit: The declared package of these four java files must be modidied to "org.apache.carbondata.core.datastorage.store.filesystem"
Repository: incubator-carbondata Updated Branches: refs/heads/master 05497d0d1 -> 894051432 The declared package of these four java files must be modidied to "org.apache.carbondata.core.datastorage.store.filesystem" minor fix Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/99e79b58 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/99e79b58 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/99e79b58 Branch: refs/heads/master Commit: 99e79b5895cf7a00260ca8050f815a7d434ff9ca Parents: 05497d0 Author: Zhang Zhichao <441586...@qq.com> Authored: Tue Dec 13 14:28:25 2016 +0800 Committer: jackylk Committed: Tue Dec 13 15:37:55 2016 +0800 -- .../core/datastorage/store/filesystem/AlluxioCarbonFileTest.java | 2 +- .../core/datastorage/store/filesystem/HDFSCarbonFileTest.java | 2 +- .../core/datastorage/store/filesystem/LocalCarbonFileTest.java | 2 +- .../core/datastorage/store/filesystem/ViewFsCarbonFileTest.java| 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/99e79b58/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/AlluxioCarbonFileTest.java -- diff --git a/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/AlluxioCarbonFileTest.java b/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/AlluxioCarbonFileTest.java index acd30c9..f78e77c 100644 --- a/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/AlluxioCarbonFileTest.java +++ b/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/AlluxioCarbonFileTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.carbondata.core.carbon.datastorage.filesystem; +package org.apache.carbondata.core.datastorage.store.filesystem; import mockit.Mock; import mockit.MockUp; http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/99e79b58/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/HDFSCarbonFileTest.java -- diff --git a/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/HDFSCarbonFileTest.java b/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/HDFSCarbonFileTest.java index c55c600..ffaace1 100644 --- a/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/HDFSCarbonFileTest.java +++ b/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/HDFSCarbonFileTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.carbondata.core.carbon.datastorage.filesystem; +package org.apache.carbondata.core.datastorage.store.filesystem; import mockit.Mock; import mockit.MockUp; http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/99e79b58/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/LocalCarbonFileTest.java -- diff --git a/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/LocalCarbonFileTest.java b/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/LocalCarbonFileTest.java index 6b644dd..7b787d4 100644 --- a/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/LocalCarbonFileTest.java +++ b/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/LocalCarbonFileTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.carbondata.core.carbon.datastorage.filesystem; +package org.apache.carbondata.core.datastorage.store.filesystem; import mockit.Mock; import mockit.MockUp; http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/99e79b58/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/ViewFsCarbonFileTest.java -- diff --git a/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/ViewFsCarbonFileTest.java b/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/ViewFsCarbonFileTest.java index 5637b3c..de37fc7 100644 --- a/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/ViewFsCarbonFileTest.java +++ b/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/ViewFsCarbonFileTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.carbondata.core.carbon.datastorage.filesystem; +package org.apache.carbondata.core.datastorage.store.fi
[1/4] incubator-carbondata git commit: Added unsafe on-heap/off-heap sort to improve loading performance
Repository: incubator-carbondata Updated Branches: refs/heads/master 894051432 -> 910155d42 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f1f9348d/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/sort/TimSort.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/sort/TimSort.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/sort/TimSort.java new file mode 100644 index 000..d9ff7e5 --- /dev/null +++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/sort/TimSort.java @@ -0,0 +1,943 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.carbondata.processing.newflow.sort.unsafe.sort; + +import java.util.Comparator; + +import org.apache.spark.util.collection.SortDataFormat; + +/** + * A port of the Apache Spark's TimSort and they originally ported from Android TimSort class, + * which utilizes a "stable, adaptive, iterative mergesort." + * See the method comment on sort() for more details. + * + * This has been kept in Java with the original style in order to match very closely with the + * Android source code, and thus be easy to verify correctness. The class is package private. We put + * a simple Scala wrapper {@link org.apache.spark.util.collection.Sorter}, which is available to + * package org.apache.spark. + * + * The purpose of the port is to generalize the interface to the sort to accept input data formats + * besides simple arrays where every element is sorted individually. For instance, the AppendOnlyMap + * uses this to sort an Array with alternating elements of the form [key, value, key, value]. + * This generalization comes with minimal overhead -- see SortDataFormat for more information. + * + * We allow key reuse to prevent creating many key objects -- see SortDataFormat. + * + * @see SortDataFormat + * @see org.apache.spark.util.collection.Sorter + */ +public class TimSort { + + /** + * This is the minimum sized sequence that will be merged. Shorter + * sequences will be lengthened by calling binarySort. If the entire + * array is less than this length, no merges will be performed. + * + * This constant should be a power of two. It was 64 in Tim Peter's C + * implementation, but 32 was empirically determined to work better in + * this implementation. In the unlikely event that you set this constant + * to be a number that's not a power of two, you'll need to change the + * minRunLength computation. + * + * If you decrease this constant, you must change the stackLen + * computation in the TimSort constructor, or you risk an + * ArrayOutOfBounds exception. See listsort.txt for a discussion + * of the minimum stack length required as a function of the length + * of the array being sorted and the minimum merge sequence length. + */ + private static final int MIN_MERGE = 32; + + private final SortDataFormat s; + + public TimSort(SortDataFormat sortDataFormat) { +this.s = sortDataFormat; + } + + /** + * A stable, adaptive, iterative mergesort that requires far fewer than + * n lg(n) comparisons when running on partially sorted arrays, while + * offering performance comparable to a traditional mergesort when run + * on random arrays. Like all proper mergesorts, this sort is stable and + * runs O(n log n) time (worst case). In the worst case, this sort requires + * temporary storage space for n/2 object references; in the best case, + * it requires only a small constant amount of space. + * + * This implementation was adapted from Tim Peters's list sort for + * Python, which is described in detail here: + * + * http://svn.python.org/projects/python/trunk/Objects/listsort.txt + * + * Tim's C code may be found here: + * + * http://svn.python.org/projects/python/trunk/Objects/listobject.c + * + * The underlying techniques are described in this paper (and may have + * even earlier origins): + * + * "Optimistic Sorting and Information Theoretic Complexity" + * Peter McIlroy + * SODA (F
[2/4] incubator-carbondata git commit: Added unsafe on-heap/off-heap sort to improve loading performance
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f1f9348d/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/holder/UnsafeInmemoryHolder.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/holder/UnsafeInmemoryHolder.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/holder/UnsafeInmemoryHolder.java new file mode 100644 index 000..d512349 --- /dev/null +++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/holder/UnsafeInmemoryHolder.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.processing.newflow.sort.unsafe.holder; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.processing.newflow.sort.unsafe.UnsafeCarbonRowPage; +import org.apache.carbondata.processing.sortandgroupby.sortdata.NewRowComparator; + +public class UnsafeInmemoryHolder implements SortTempChunkHolder { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(UnsafeInmemoryHolder.class.getName()); + + private int counter; + + private int actualSize; + + private UnsafeCarbonRowPage rowPage; + + private Object[] currentRow; + + private long address; + + private NewRowComparator comparator; + + private int columnSize; + + public UnsafeInmemoryHolder(UnsafeCarbonRowPage rowPage, int columnSize) { +this.actualSize = rowPage.getBuffer().getActualSize(); +this.rowPage = rowPage; +LOGGER.audit("Processing unsafe inmemory rows page with size : " + actualSize); +this.comparator = new NewRowComparator(rowPage.getNoDictionaryDimensionMapping()); +this.columnSize = columnSize; + } + + public boolean hasNext() { +if (counter < actualSize) { + return true; +} +return false; + } + + public void readRow() { +currentRow = new Object[columnSize]; +address = rowPage.getBuffer().get(counter); +rowPage.getRow(address + rowPage.getDataBlock().getBaseOffset(), currentRow); +counter++; + } + + public Object[] getRow() { +return currentRow; + } + + @Override public int compareTo(SortTempChunkHolder o) { +return comparator.compare(currentRow, o.getRow()); + } + + public int numberOfRows() { +return actualSize; + } + + public void close() { +rowPage.freeMemory(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f1f9348d/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/holder/UnsafeInmemoryMergeHolder.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/holder/UnsafeInmemoryMergeHolder.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/holder/UnsafeInmemoryMergeHolder.java new file mode 100644 index 000..9f157a0 --- /dev/null +++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/sort/unsafe/holder/UnsafeInmemoryMergeHolder.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.carbondata.processing.newflow.sort.unsafe.holder; + +import org.apache.carbondata.common.logging.LogService; +import org.
[4/4] incubator-carbondata git commit: [CARBONDATA-470]Add unsafe offheap and on-heap sort in carbodata loading This closes #369
[CARBONDATA-470]Add unsafe offheap and on-heap sort in carbodata loading This closes #369 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/910155d4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/910155d4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/910155d4 Branch: refs/heads/master Commit: 910155d42196dafed6d684df9d29b72ceb702d52 Parents: 8940514 f1f9348 Author: jackylk Authored: Tue Dec 13 19:14:07 2016 +0800 Committer: jackylk Committed: Tue Dec 13 19:14:07 2016 +0800 -- .../core/constants/CarbonCommonConstants.java | 32 + .../core/memory/HeapMemoryAllocator.java| 84 ++ .../carbondata/core/memory/MemoryAllocator.java | 36 + .../carbondata/core/memory/MemoryBlock.java | 57 ++ .../carbondata/core/memory/MemoryLocation.java | 55 ++ .../core/memory/UnsafeMemoryAllocator.java | 40 + .../carbondata/core/unsafe/CarbonUnsafe.java| 48 + dev/javastyle-config.xml| 6 +- dev/javastyle-suppressions.xml | 35 + .../newflow/iterator/InputIterator.java | 40 + .../impl/UnsafeParallelReadMergeSorterImpl.java | 219 + .../newflow/sort/unsafe/IntPointerBuffer.java | 95 ++ .../sort/unsafe/UnsafeCarbonRowPage.java| 356 +++ .../sort/unsafe/UnsafeMemoryManager.java| 99 ++ .../newflow/sort/unsafe/UnsafeSortDataRows.java | 356 +++ .../unsafe/comparator/UnsafeRowComparator.java | 133 +++ .../UnsafeRowComparatorForNormalDIms.java | 61 ++ .../sort/unsafe/holder/SortTempChunkHolder.java | 35 + .../sort/unsafe/holder/UnsafeCarbonRow.java | 23 + .../unsafe/holder/UnsafeCarbonRowForMerge.java | 25 + .../holder/UnsafeFinalMergePageHolder.java | 90 ++ .../unsafe/holder/UnsafeInmemoryHolder.java | 80 ++ .../holder/UnsafeInmemoryMergeHolder.java | 90 ++ .../holder/UnsafeSortTempFileChunkHolder.java | 455 + .../UnsafeInMemoryIntermediateDataMerger.java | 217 + .../merger/UnsafeIntermediateFileMerger.java| 364 +++ .../unsafe/merger/UnsafeIntermediateMerger.java | 180 .../UnsafeSingleThreadFinalSortFilesMerger.java | 313 ++ .../newflow/sort/unsafe/sort/TimSort.java | 943 +++ .../unsafe/sort/UnsafeIntSortDataFormat.java| 74 ++ .../newflow/steps/SortProcessorStepImpl.java| 10 +- 31 files changed, 4647 insertions(+), 4 deletions(-) --
[3/4] incubator-carbondata git commit: Added unsafe on-heap/off-heap sort to improve loading performance
Added unsafe on-heap/off-heap sort to improve loading performance fixed testcase fixed row duplicated issue. rebased and changed the default value Added file header for code porting. Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/f1f9348d Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/f1f9348d Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/f1f9348d Branch: refs/heads/master Commit: f1f9348d0d7150c95500f8f10d3fd3adde47ecb2 Parents: 8940514 Author: ravipesala Authored: Wed Dec 7 00:29:04 2016 +0530 Committer: jackylk Committed: Tue Dec 13 19:13:25 2016 +0800 -- .../core/constants/CarbonCommonConstants.java | 32 + .../core/memory/HeapMemoryAllocator.java| 84 ++ .../carbondata/core/memory/MemoryAllocator.java | 36 + .../carbondata/core/memory/MemoryBlock.java | 57 ++ .../carbondata/core/memory/MemoryLocation.java | 55 ++ .../core/memory/UnsafeMemoryAllocator.java | 40 + .../carbondata/core/unsafe/CarbonUnsafe.java| 48 + dev/javastyle-config.xml| 6 +- dev/javastyle-suppressions.xml | 35 + .../newflow/iterator/InputIterator.java | 40 + .../impl/UnsafeParallelReadMergeSorterImpl.java | 219 + .../newflow/sort/unsafe/IntPointerBuffer.java | 95 ++ .../sort/unsafe/UnsafeCarbonRowPage.java| 356 +++ .../sort/unsafe/UnsafeMemoryManager.java| 99 ++ .../newflow/sort/unsafe/UnsafeSortDataRows.java | 356 +++ .../unsafe/comparator/UnsafeRowComparator.java | 133 +++ .../UnsafeRowComparatorForNormalDIms.java | 61 ++ .../sort/unsafe/holder/SortTempChunkHolder.java | 35 + .../sort/unsafe/holder/UnsafeCarbonRow.java | 23 + .../unsafe/holder/UnsafeCarbonRowForMerge.java | 25 + .../holder/UnsafeFinalMergePageHolder.java | 90 ++ .../unsafe/holder/UnsafeInmemoryHolder.java | 80 ++ .../holder/UnsafeInmemoryMergeHolder.java | 90 ++ .../holder/UnsafeSortTempFileChunkHolder.java | 455 + .../UnsafeInMemoryIntermediateDataMerger.java | 217 + .../merger/UnsafeIntermediateFileMerger.java| 364 +++ .../unsafe/merger/UnsafeIntermediateMerger.java | 180 .../UnsafeSingleThreadFinalSortFilesMerger.java | 313 ++ .../newflow/sort/unsafe/sort/TimSort.java | 943 +++ .../unsafe/sort/UnsafeIntSortDataFormat.java| 74 ++ .../newflow/steps/SortProcessorStepImpl.java| 10 +- 31 files changed, 4647 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f1f9348d/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 8257756..033b48d 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -953,6 +953,38 @@ public final class CarbonCommonConstants { */ public static final int CARBON_EXECUTOR_STARTUP_THREAD_SLEEP_TIME = 250; + /** + * to enable offheap sort + */ + public static final String ENABLE_UNSAFE_SORT = "enable.unsafe.sort"; + + /** + * to enable offheap sort + */ + public static final String ENABLE_UNSAFE_SORT_DEFAULT = "false"; + + /** + * to enable offheap sort + */ + public static final String ENABLE_OFFHEAP_SORT = "enable.offheap.sort"; + + /** + * to enable offheap sort + */ + public static final String ENABLE_OFFHEAP_SORT_DEFAULT = "true"; + + public static final String ENABLE_INMEMORY_MERGE_SORT = "enable.inmemory.merge.sort"; + + public static final String ENABLE_INMEMORY_MERGE_SORT_DEFAULT = "true"; + + public static final String OFFHEAP_SORT_CHUNK_SIZE_IN_MB = "offheap.sort.chunk.size.inmb"; + + public static final String OFFHEAP_SORT_CHUNK_SIZE_IN_MB_DEFAULT = "64"; + + public static final String IN_MEMORY_FOR_SORT_DATA_IN_MB = "sort.inmemory.size.inmb"; + + public static final String IN_MEMORY_FOR_SORT_DATA_IN_MB_DEFAULT = "1024"; + private CarbonCommonConstants() { } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/f1f9348d/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/memory/HeapMemoryAllocator.java b/core/src/mai
[1/2] incubator-carbondata git commit: Correct DirectSQLExample name
Repository: incubator-carbondata Updated Branches: refs/heads/master 910155d42 -> bbb5919a6 Correct DirectSQLExample name Correct DirectSQLExample name Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/5656bb69 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/5656bb69 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/5656bb69 Branch: refs/heads/master Commit: 5656bb692f71f6d0f6dbf6ae3f77ba27c027f8c5 Parents: 910155d Author: amy-309 Authored: Tue Dec 13 23:15:18 2016 +0800 Committer: GitHub Committed: Tue Dec 13 23:15:18 2016 +0800 -- .../scala/org/apache/carbondata/examples/DirectSQLExample.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/5656bb69/examples/spark/src/main/scala/org/apache/carbondata/examples/DirectSQLExample.scala -- diff --git a/examples/spark/src/main/scala/org/apache/carbondata/examples/DirectSQLExample.scala b/examples/spark/src/main/scala/org/apache/carbondata/examples/DirectSQLExample.scala index 2553e8c..5e448fe 100644 --- a/examples/spark/src/main/scala/org/apache/carbondata/examples/DirectSQLExample.scala +++ b/examples/spark/src/main/scala/org/apache/carbondata/examples/DirectSQLExample.scala @@ -28,7 +28,7 @@ import org.apache.carbondata.examples.util.ExampleUtils object DirectSQLExample { def main(args: Array[String]) { -val cc = ExampleUtils.createCarbonContext("DatasourceExample") +val cc = ExampleUtils.createCarbonContext("DirectSQLExample") ExampleUtils.writeSampleCarbonFile(cc, "table1") // Use SQLContext to read CarbonData files without creating table
[2/2] incubator-carbondata git commit: Correct DirectSQLExample name This closes #431
Correct DirectSQLExample name This closes #431 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/bbb5919a Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/bbb5919a Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/bbb5919a Branch: refs/heads/master Commit: bbb5919a64bb2ea16f180ed49a6f29c554622ff8 Parents: 910155d 5656bb6 Author: jackylk Authored: Thu Dec 15 15:29:54 2016 +0800 Committer: jackylk Committed: Thu Dec 15 15:29:54 2016 +0800 -- .../scala/org/apache/carbondata/examples/DirectSQLExample.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --
[1/3] incubator-carbondata git commit: support datatype: date and char
Repository: incubator-carbondata Updated Branches: refs/heads/master bbb5919a6 -> ecf29472e http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/d73f4bfe/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryTestCase.scala -- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryTestCase.scala index 4f8bf1a..f166025 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryTestCase.scala @@ -50,16 +50,16 @@ class TimestampDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfte TimeStampGranularityConstants.TIME_GRAN_SEC.toString ) CarbonProperties.getInstance().addProperty("carbon.direct.dictionary", "true") + sql("drop table if exists directDictionaryTable") + sql("drop table if exists directDictionaryTable_hive") sql( -"CREATE TABLE if not exists directDictionaryTable (empno int,doj Timestamp, " + - "salary int) " + +"CREATE TABLE if not exists directDictionaryTable (empno int,doj Timestamp, salary int) " + "STORED BY 'org.apache.carbondata.format'" ) sql( -"CREATE TABLE if not exists directDictionaryTable_hive (empno int,doj Timestamp, " + -"salary int) " + -"row format delimited fields terminated by ','" +"CREATE TABLE if not exists directDictionaryTable_hive (empno int,doj Timestamp, salary int) " + + "row format delimited fields terminated by ','" ) CarbonProperties.getInstance() @@ -68,8 +68,8 @@ class TimestampDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfte .getCanonicalPath val csvFilePath = currentDirectory + "/src/test/resources/datasample.csv" sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable OPTIONS" + -"('DELIMITER'= ',', 'QUOTECHAR'= '\"')"); - sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable_hive"); +"('DELIMITER'= ',', 'QUOTECHAR'= '\"')") + sql("LOAD DATA local inpath '" + csvFilePath + "' INTO TABLE directDictionaryTable_hive") } catch { case x: Throwable => CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "dd-MM-") @@ -97,8 +97,8 @@ class TimestampDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfte test("test direct dictionary for not equals condition") { checkAnswer( - sql("select doj from directDictionaryTable where doj != '2016-04-14 15:00:09.0'"), - Seq(Row(Timestamp.valueOf("2016-03-14 15:00:09.0")) + sql("select doj from directDictionaryTable where doj != '2016-04-14 15:00:09'"), + Seq(Row(Timestamp.valueOf("2016-03-14 15:00:09")) ) ) } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/d73f4bfe/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryWithNoDictTestCase.scala -- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryWithNoDictTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryWithNoDictTestCase.scala index 22678af..991b1bf 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryWithNoDictTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryWithNoDictTestCase.scala @@ -88,8 +88,8 @@ class TimestampDataTypeDirectDictionaryWithNoDictTestCase extends QueryTest with ) } - -test("select doj from directDictionaryTable with greater than filter") { + + test("select doj from directDictionaryTable with greater than filter") { checkAnswer( sql("select doj from directDictionaryTable where doj>'2016-03-14 15:00:09'"), Seq(Row(Timestamp.valueOf("2016-04-14 15:00:09"))) @@ -99,7 +99,7 @@ class TimestampDataTypeDirectDictionaryWithNoDictTestCase extends QueryTest with override def afterAll { - sql("drop table directDictionaryTable") +sql("drop table directDictionaryTable") CarbonProperties.getInstance() .addProperty(CarbonCommonCon
[3/3] incubator-carbondata git commit: [CARBONDATA-535]Support data type: date and char This closes #411
[CARBONDATA-535]Support data type: date and char This closes #411 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/ecf29472 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/ecf29472 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/ecf29472 Branch: refs/heads/master Commit: ecf29472ee73745a1c89a37f7d3dd5603c4ac9bf Parents: bbb5919 d73f4bf Author: jackylk Authored: Thu Dec 15 17:04:53 2016 +0800 Committer: jackylk Committed: Thu Dec 15 17:04:53 2016 +0800 -- .../cache/dictionary/ColumnDictionaryInfo.java | 1 + .../ThriftWrapperSchemaConverterImpl.java | 4 + .../DirectDictionaryGenerator.java | 2 +- .../DirectDictionaryKeyGeneratorFactory.java| 4 + .../DateDirectDictionaryGenerator.java | 162 +++ .../util/AbstractDataFileFooterConverter.java | 2 + .../carbondata/core/util/DataTypeUtil.java | 5 + .../sortindex/CarbonDictionarySortModel.java| 1 + .../scan/complextypes/PrimitiveQueryType.java | 2 + .../scan/expression/ExpressionResult.java | 99 ++-- .../scan/expression/LiteralExpression.java | 4 +- .../conditional/EqualToExpression.java | 1 + .../GreaterThanEqualToExpression.java | 1 + .../conditional/GreaterThanExpression.java | 1 + .../expression/conditional/InExpression.java| 1 + .../conditional/LessThanEqualToExpression.java | 1 + .../conditional/LessThanExpression.java | 1 + .../conditional/NotEqualsExpression.java| 1 + .../expression/conditional/NotInExpression.java | 1 + .../carbondata/scan/filter/FilterUtil.java | 13 +- .../resolver/ConditionalFilterResolverImpl.java | 3 +- .../resolver/RestructureFilterResolverImpl.java | 6 +- .../visitor/CustomTypeDictionaryVisitor.java| 15 +- .../visitor/ResolvedFilterInfoVisitorIntf.java | 2 +- .../carbondata/core/util/DataTypeUtilTest.java | 1 + .../DictionaryBasedResultCollectorTest.java | 9 +- .../scan/expression/ExpressionResultTest.java | 4 +- .../carbondata/examples/CarbonExample.scala | 2 +- examples/spark2/src/main/resources/data.csv | 20 +-- .../carbondata/examples/CarbonExample.scala | 23 ++- format/src/main/thrift/schema.thrift| 1 + .../carbondata/spark/util/CarbonScalaUtil.scala | 1 + .../spark/util/DataTypeConverterUtil.scala | 5 +- .../execution/command/carbonTableSchema.scala | 2 +- .../readsupport/SparkRowReadSupportImpl.java| 16 +- .../spark/CarbonDataFrameWriter.scala | 1 + .../spark/sql/CarbonDictionaryDecoder.scala | 1 + .../org/apache/spark/sql/CarbonSqlParser.scala | 21 ++- .../apache/spark/sql/hive/CarbonMetastore.scala | 5 +- .../src/test/resources/datasamplefordate.csv| 4 + .../spark/src/test/resources/datasamplenull.csv | 2 +- .../DateDataTypeDirectDictionaryTest.scala | 154 ++ ...TypeDirectDictionaryWithNoDictTestCase.scala | 101 .../DateDataTypeNullDataTest.scala | 88 ++ ...estampDataTypeDirectDictionaryTestCase.scala | 18 +-- ...TypeDirectDictionaryWithNoDictTestCase.scala | 6 +- .../TimestampDataTypeNullDataTest.scala | 16 +- .../readsupport/SparkRowReadSupportImpl.java| 6 +- .../spark/sql/CarbonDataFrameWriter.scala | 1 + .../spark/sql/CarbonDictionaryDecoder.scala | 2 + .../org/apache/spark/sql/CarbonSource.scala | 13 +- .../org/apache/spark/sql/TableCreator.scala | 7 +- .../apache/spark/sql/hive/CarbonMetastore.scala | 2 + 53 files changed, 768 insertions(+), 97 deletions(-) --
[2/3] incubator-carbondata git commit: support datatype: date and char
support datatype: date and char Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/d73f4bfe Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/d73f4bfe Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/d73f4bfe Branch: refs/heads/master Commit: d73f4bfe82e8be8970f41ce04707859e5b9bcce9 Parents: bbb5919 Author: QiangCai Authored: Wed Dec 7 14:29:29 2016 +0800 Committer: jackylk Committed: Thu Dec 15 17:04:16 2016 +0800 -- .../cache/dictionary/ColumnDictionaryInfo.java | 1 + .../ThriftWrapperSchemaConverterImpl.java | 4 + .../DirectDictionaryGenerator.java | 2 +- .../DirectDictionaryKeyGeneratorFactory.java| 4 + .../DateDirectDictionaryGenerator.java | 162 +++ .../util/AbstractDataFileFooterConverter.java | 2 + .../carbondata/core/util/DataTypeUtil.java | 5 + .../sortindex/CarbonDictionarySortModel.java| 1 + .../scan/complextypes/PrimitiveQueryType.java | 2 + .../scan/expression/ExpressionResult.java | 99 ++-- .../scan/expression/LiteralExpression.java | 4 +- .../conditional/EqualToExpression.java | 1 + .../GreaterThanEqualToExpression.java | 1 + .../conditional/GreaterThanExpression.java | 1 + .../expression/conditional/InExpression.java| 1 + .../conditional/LessThanEqualToExpression.java | 1 + .../conditional/LessThanExpression.java | 1 + .../conditional/NotEqualsExpression.java| 1 + .../expression/conditional/NotInExpression.java | 1 + .../carbondata/scan/filter/FilterUtil.java | 13 +- .../resolver/ConditionalFilterResolverImpl.java | 3 +- .../resolver/RestructureFilterResolverImpl.java | 6 +- .../visitor/CustomTypeDictionaryVisitor.java| 15 +- .../visitor/ResolvedFilterInfoVisitorIntf.java | 2 +- .../carbondata/core/util/DataTypeUtilTest.java | 1 + .../DictionaryBasedResultCollectorTest.java | 9 +- .../scan/expression/ExpressionResultTest.java | 4 +- .../carbondata/examples/CarbonExample.scala | 2 +- examples/spark2/src/main/resources/data.csv | 20 +-- .../carbondata/examples/CarbonExample.scala | 23 ++- format/src/main/thrift/schema.thrift| 1 + .../carbondata/spark/util/CarbonScalaUtil.scala | 1 + .../spark/util/DataTypeConverterUtil.scala | 5 +- .../execution/command/carbonTableSchema.scala | 2 +- .../readsupport/SparkRowReadSupportImpl.java| 16 +- .../spark/CarbonDataFrameWriter.scala | 1 + .../spark/sql/CarbonDictionaryDecoder.scala | 1 + .../org/apache/spark/sql/CarbonSqlParser.scala | 21 ++- .../apache/spark/sql/hive/CarbonMetastore.scala | 5 +- .../src/test/resources/datasamplefordate.csv| 4 + .../spark/src/test/resources/datasamplenull.csv | 2 +- .../DateDataTypeDirectDictionaryTest.scala | 154 ++ ...TypeDirectDictionaryWithNoDictTestCase.scala | 101 .../DateDataTypeNullDataTest.scala | 88 ++ ...estampDataTypeDirectDictionaryTestCase.scala | 18 +-- ...TypeDirectDictionaryWithNoDictTestCase.scala | 6 +- .../TimestampDataTypeNullDataTest.scala | 16 +- .../readsupport/SparkRowReadSupportImpl.java| 6 +- .../spark/sql/CarbonDataFrameWriter.scala | 1 + .../spark/sql/CarbonDictionaryDecoder.scala | 2 + .../org/apache/spark/sql/CarbonSource.scala | 13 +- .../org/apache/spark/sql/TableCreator.scala | 7 +- .../apache/spark/sql/hive/CarbonMetastore.scala | 2 + 53 files changed, 768 insertions(+), 97 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/d73f4bfe/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ColumnDictionaryInfo.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ColumnDictionaryInfo.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ColumnDictionaryInfo.java index c19f244..1d2eb8b 100644 --- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ColumnDictionaryInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ColumnDictionaryInfo.java @@ -285,6 +285,7 @@ public class ColumnDictionaryInfo extends AbstractColumnDictionaryInfo { case BOOLEAN: return Boolean .compare((Boolean.parseBoolean(dictionaryVal)), (Boolean.parseBoolean(memberVal))); +case DATE: case TIMESTAMP: SimpleDateFormat parser = new SimpleDateFormat(CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, http://git
[1/2] incubator-carbondata git commit: fixLoadTableForSpark2
Repository: incubator-carbondata Updated Branches: refs/heads/master ecf29472e -> 526243b09 fixLoadTableForSpark2 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/1b5e7fb4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/1b5e7fb4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/1b5e7fb4 Branch: refs/heads/master Commit: 1b5e7fb442dd99b859b819fad5dea8cbc754e4c2 Parents: ecf2947 Author: QiangCai Authored: Fri Dec 16 00:26:09 2016 +0800 Committer: QiangCai Committed: Fri Dec 16 00:46:54 2016 +0800 -- .../execution/command/carbonTableSchema.scala | 46 +++- 1 file changed, 44 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/1b5e7fb4/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala index 7f74d92..10fffd9 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Cast, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.hive.CarbonRelation +import org.apache.spark.sql.hive.{CarbonMetastore, CarbonRelation} import org.apache.spark.sql.types.TimestampType import org.apache.spark.util.FileUtils @@ -34,6 +34,8 @@ import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.carbon.CarbonDataLoadSchema import org.apache.carbondata.core.carbon.metadata.CarbonMetadata import org.apache.carbondata.core.carbon.metadata.schema.table.{CarbonTable, TableInfo} +import org.apache.carbondata.core.carbon.metadata.schema.table.column.CarbonDimension +import org.apache.carbondata.core.carbon.path.CarbonStorePath import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastorage.store.impl.FileFactory import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} @@ -43,7 +45,7 @@ import org.apache.carbondata.processing.constants.TableOptionConstant import org.apache.carbondata.processing.etl.DataLoadingException import org.apache.carbondata.processing.model.CarbonLoadModel import org.apache.carbondata.spark.exception.MalformedCarbonCommandException -import org.apache.carbondata.spark.rdd.{CarbonDataRDDFactory, DataManagementFunc} +import org.apache.carbondata.spark.rdd.{CarbonDataRDDFactory, DataManagementFunc, DictionaryLoadModel} import org.apache.carbondata.spark.util.{CarbonScalaUtil, CarbonSparkUtil, GlobalDictionaryUtil} /** @@ -259,6 +261,45 @@ case class DeleteLoadsByLoadDate( } +object LoadTable { + + def updateTableMetadata(carbonLoadModel: CarbonLoadModel, + sqlContext: SQLContext, + model: DictionaryLoadModel, + noDictDimension: Array[CarbonDimension]): Unit = { + +val carbonTablePath = CarbonStorePath.getCarbonTablePath(model.hdfsLocation, + model.table) +val schemaFilePath = carbonTablePath.getSchemaFilePath + +// read TableInfo +val tableInfo = CarbonMetastore.readSchemaFileToThriftTable(schemaFilePath) + +// modify TableInfo +val columns = tableInfo.getFact_table.getTable_columns +for (i <- 0 until columns.size) { + if (noDictDimension.exists(x => columns.get(i).getColumn_id.equals(x.getColumnId))) { + columns.get(i).encoders.remove(org.apache.carbondata.format.Encoding.DICTIONARY) + } +} + +// write TableInfo +CarbonMetastore.writeThriftTableToSchemaFile(schemaFilePath, tableInfo) + +// update Metadata +val catalog = CarbonEnv.get.carbonMetastore +catalog.updateMetadataByThriftTable(schemaFilePath, tableInfo, + model.table.getDatabaseName, model.table.getTableName, carbonLoadModel.getStorePath) + +// update CarbonDataLoadSchema +val carbonTable = catalog.lookupRelation(Option(model.table.getDatabaseName), + model.table.getTableName)(sqlContext.sparkSession).asInstanceOf[CarbonRelation].tableMeta + .carbonTable +carbonLoadModel.setCarbonDataLoadSchema(new CarbonDataLoadSchema(carbonTable)) + } + +} + case class LoadTableByInsert(relation: CarbonDatasourceHadoopRelation, child: LogicalPlan) { val LOGGER = LogServi
[2/2] incubator-carbondata git commit: [CARBONDATA-536]initialize updateTableMetadata method in LoadTable for Spark2 This closes #439
[CARBONDATA-536]initialize updateTableMetadata method in LoadTable for Spark2 This closes #439 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/526243b0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/526243b0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/526243b0 Branch: refs/heads/master Commit: 526243b09c0218fa05238ac16c3be71f06dc0af8 Parents: ecf2947 1b5e7fb Author: jackylk Authored: Fri Dec 16 09:49:15 2016 +0800 Committer: jackylk Committed: Fri Dec 16 09:49:15 2016 +0800 -- .../execution/command/carbonTableSchema.scala | 46 +++- 1 file changed, 44 insertions(+), 2 deletions(-) --
[4/4] incubator-carbondata git commit: [CARBONDATA-516][SPARK2]fix union issue in CarbonLateDecoderRule This closes #413
[CARBONDATA-516][SPARK2]fix union issue in CarbonLateDecoderRule This closes #413 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/f5ecfbf5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/f5ecfbf5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/f5ecfbf5 Branch: refs/heads/master Commit: f5ecfbf5cfb65e883e69606e7911381fd915683a Parents: 526243b 462f642 Author: jackylk Authored: Fri Dec 16 10:37:47 2016 +0800 Committer: jackylk Committed: Fri Dec 16 10:37:47 2016 +0800 -- .../impl/DictionaryBasedResultCollector.java| 29 +- .../DictionaryBasedResultCollectorTest.java |9 +- .../carbondata/examples/CarbonExample.scala | 12 + .../apache/carbondata/spark/CarbonFilters.scala |7 + .../CarbonDecoderOptimizerHelper.scala | 24 +- .../readsupport/SparkRowReadSupportImpl.java|5 +- .../apache/carbondata/spark/CarbonFilters.scala |6 + .../spark/sql/CarbonDataFrameWriter.scala |3 +- .../sql/CarbonDatasourceHadoopRelation.scala|4 +- .../spark/sql/CarbonDictionaryDecoder.scala | 60 +- .../scala/org/apache/spark/sql/CarbonScan.scala | 44 +- .../org/apache/spark/sql/CarbonSource.scala |3 +- .../sql/optimizer/CarbonLateDecodeRule.scala| 124 +- integration/spark2/src/test/resources/data.csv | 11 + .../AllDataTypesTestCaseAggregate.scala | 1161 ++ .../spark/sql/common/util/CarbonFunSuite.scala | 49 + .../sql/common/util/CarbonSessionTest.scala | 74 ++ .../apache/spark/sql/common/util/PlanTest.scala | 59 + .../spark/sql/common/util/QueryTest.scala | 149 +++ 19 files changed, 1654 insertions(+), 179 deletions(-) --
[1/4] incubator-carbondata git commit: fixUnionIssue and add test case
Repository: incubator-carbondata Updated Branches: refs/heads/master 526243b09 -> f5ecfbf5c http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/462f6422/integration/spark2/src/test/scala/org/apache/spark/sql/common/util/CarbonFunSuite.scala -- diff --git a/integration/spark2/src/test/scala/org/apache/spark/sql/common/util/CarbonFunSuite.scala b/integration/spark2/src/test/scala/org/apache/spark/sql/common/util/CarbonFunSuite.scala new file mode 100644 index 000..4647e78 --- /dev/null +++ b/integration/spark2/src/test/scala/org/apache/spark/sql/common/util/CarbonFunSuite.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.common.util + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.scalatest.{FunSuite, Outcome} + + +private[spark] abstract class CarbonFunSuite extends FunSuite { + + private val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName) + + /** + * Log the suite name and the test name before and after each test. + * + * Subclasses should never override this method. If they wish to run + * custom code before and after each test, they should should mix in + * the {{org.scalatest.BeforeAndAfter}} trait instead. + */ + final protected override def withFixture(test: NoArgTest): Outcome = { +val testName = test.text +val suiteName = this.getClass.getName +val shortSuiteName = suiteName.replaceAll("org.apache.spark", "o.a.s") +try { + LOGGER.info(s"\n\n= TEST OUTPUT FOR $shortSuiteName: '$testName' =\n") + test() +} finally { + LOGGER.info(s"\n\n= FINISHED $shortSuiteName: '$testName' =\n") +} + } + +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/462f6422/integration/spark2/src/test/scala/org/apache/spark/sql/common/util/CarbonSessionTest.scala -- diff --git a/integration/spark2/src/test/scala/org/apache/spark/sql/common/util/CarbonSessionTest.scala b/integration/spark2/src/test/scala/org/apache/spark/sql/common/util/CarbonSessionTest.scala new file mode 100644 index 000..d29196e --- /dev/null +++ b/integration/spark2/src/test/scala/org/apache/spark/sql/common/util/CarbonSessionTest.scala @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.common.util + +import java.io.File + +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties +import org.apache.commons.io.FileUtils + +object CarbonSessionTest extends{ + +val rootPath = new File(this.getClass.getResource("/").getPath + + "../../../..").getCanonicalPath +val storeLocation = s"$rootPath/examples/spark2/target/store" +val warehouse = s"$rootPath/examples/spark2/target/warehouse" +val metastoredb = s"$rootPath/examples/spark2/target/metastore_db" + +val spark = { + +// clean data folder +if (true) { +val clean = (path: String) => FileUtils.deleteDirectory(new File(path)) +clean(storeLocation) +clean(warehouse) +clean(metastoredb) +} + +val spark = SparkSession + .builder() + .master("l
[3/4] incubator-carbondata git commit: fixUnionIssue and add test case
fixUnionIssue and add test case Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/462f6422 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/462f6422 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/462f6422 Branch: refs/heads/master Commit: 462f64226428fc255938d8752226cda262ad0ae4 Parents: 526243b Author: QiangCai Authored: Thu Dec 8 19:06:33 2016 +0800 Committer: jackylk Committed: Fri Dec 16 10:13:39 2016 +0800 -- .../impl/DictionaryBasedResultCollector.java| 29 +- .../DictionaryBasedResultCollectorTest.java |9 +- .../carbondata/examples/CarbonExample.scala | 12 + .../apache/carbondata/spark/CarbonFilters.scala |7 + .../CarbonDecoderOptimizerHelper.scala | 24 +- .../readsupport/SparkRowReadSupportImpl.java|5 +- .../apache/carbondata/spark/CarbonFilters.scala |6 + .../spark/sql/CarbonDataFrameWriter.scala |3 +- .../sql/CarbonDatasourceHadoopRelation.scala|4 +- .../spark/sql/CarbonDictionaryDecoder.scala | 60 +- .../scala/org/apache/spark/sql/CarbonScan.scala | 44 +- .../org/apache/spark/sql/CarbonSource.scala |3 +- .../sql/optimizer/CarbonLateDecodeRule.scala| 124 +- integration/spark2/src/test/resources/data.csv | 11 + .../AllDataTypesTestCaseAggregate.scala | 1161 ++ .../spark/sql/common/util/CarbonFunSuite.scala | 49 + .../sql/common/util/CarbonSessionTest.scala | 74 ++ .../apache/spark/sql/common/util/PlanTest.scala | 59 + .../spark/sql/common/util/QueryTest.scala | 149 +++ 19 files changed, 1654 insertions(+), 179 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/462f6422/core/src/main/java/org/apache/carbondata/scan/collector/impl/DictionaryBasedResultCollector.java -- diff --git a/core/src/main/java/org/apache/carbondata/scan/collector/impl/DictionaryBasedResultCollector.java b/core/src/main/java/org/apache/carbondata/scan/collector/impl/DictionaryBasedResultCollector.java index 108677f..2462caa 100644 --- a/core/src/main/java/org/apache/carbondata/scan/collector/impl/DictionaryBasedResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/scan/collector/impl/DictionaryBasedResultCollector.java @@ -20,11 +20,13 @@ package org.apache.carbondata.scan.collector.impl; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.carbon.metadata.encoder.Encoding; import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory; import org.apache.carbondata.core.util.CarbonUtil; @@ -35,6 +37,7 @@ import org.apache.carbondata.scan.model.QueryDimension; import org.apache.carbondata.scan.model.QueryMeasure; import org.apache.carbondata.scan.result.AbstractScannedResult; +import org.apache.commons.lang3.ArrayUtils; /** * It is not a collector it is just a scanned result holder. */ @@ -52,9 +55,31 @@ public class DictionaryBasedResultCollector extends AbstractScannedResultCollect * it will keep track of how many record is processed, to handle limit scenario */ @Override public List collectData(AbstractScannedResult scannedResult, int batchSize) { + List listBasedResult = new ArrayList<>(batchSize); boolean isMsrsPresent = measureDatatypes.length > 0; + QueryDimension[] queryDimensions = tableBlockExecutionInfos.getQueryDimensions(); +List dictionaryIndexes = new ArrayList(); +for (int i = 0; i < queryDimensions.length; i++) { + if(queryDimensions[i].getDimension().hasEncoding(Encoding.DICTIONARY) || + queryDimensions[i].getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY) ) { +dictionaryIndexes.add(queryDimensions[i].getDimension().getOrdinal()); + } +} +int[] primitive = ArrayUtils.toPrimitive(dictionaryIndexes.toArray( +new Integer[dictionaryIndexes.size()])); +Arrays.sort(primitive); +int[] actualIndexInSurrogateKey = new int[dictionaryIndexes.size()]; +int index = 0; +for (int i = 0; i < queryDimensions.length; i++) { + if(queryDimensions[i].getDimension().hasEncoding(Encoding.DICTIONARY) || + queryDimensions[i].getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY) ) { +actualIndexInSurrogateKey[index++] = Arrays.binarySearch(primitive, +qu
[2/4] incubator-carbondata git commit: fixUnionIssue and add test case
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/462f6422/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala -- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala b/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala new file mode 100644 index 000..c8d5221 --- /dev/null +++ b/integration/spark2/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/AllDataTypesTestCaseAggregate.scala @@ -0,0 +1,1161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.carbondata.spark.testsuite.allqueries + +import java.io.File + +import org.apache.spark.sql.{Row, SaveMode} +import org.apache.spark.sql.common.util.CarbonSessionTest._ +import org.apache.spark.sql.common.util.QueryTest +import org.scalatest.BeforeAndAfterAll +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + +/** + * Test Class for all query on multiple datatypes + * + */ +class AllDataTypesTestCaseAggregate extends QueryTest with BeforeAndAfterAll { + + override def beforeAll { +clean +val currentDirectory = new File(this.getClass.getResource("/").getPath + "/../../../spark") + .getCanonicalPath + +sql("drop table if exists Carbon_automation_test") +sql("drop table if exists Carbon_automation_hive") +sql("drop table if exists Carbon_automation_test_hive") + +sql("create table if not exists Carbon_automation_test (imei string,deviceInformationId int,MAC string,deviceColor string,device_backColor string,modelId string,marketName string,AMSize string,ROMSize string,CUPAudit string,CPIClocked string,series string,productionDate timestamp,bomCode string,internalModels string, deliveryTime string, channelsId string, channelsName string , deliveryAreaId string, deliveryCountry string, deliveryProvince string, deliveryCity string,deliveryDistrict string, deliveryStreet string, oxSingleNumber string,contractNumber int, ActiveCheckTime string, ActiveAreaId string, ActiveCountry string, ActiveProvince string, Activecity string, ActiveDistrict string, ActiveStreet string, ActiveOperatorId string, Active_releaseId string, Active_EMUIVersion string, Active_operaSysVersion string, Active_BacVerNumber string, Active_BacFlashVer string, Active_webUIVersion string, Active_webUITypeCarrVer string,Active_webTypeDataVerNumber string, Active_operatorsVer sion string, Active_phonePADPartitionedVersions string, Latest_YEAR int, Latest_MONTH int, Latest_DAY int, Latest_HOUR string, Latest_areaId string, Latest_country string, Latest_province string, Latest_city string, Latest_district string, Latest_street string, Latest_releaseId string, Latest_EMUIVersion string, Latest_operaSysVersion string, Latest_BacVerNumber string, Latest_BacFlashVer string, Latest_webUIVersion string, Latest_webUITypeCarrVer string, Latest_webTypeDataVerNumber string, Latest_operatorsVersion string, Latest_phonePADPartitionedVersions string, Latest_operatorId string, gamePointId int,gamePointDescription string) USING org.apache.spark.sql.CarbonSource OPTIONS('dbName'='default', 'tableName'='Carbon_automation_test','DICTIONARY_INCLUDE'='Latest_MONTH,Latest_DAY,deviceInformationId')"); +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) + +sql("create table if not exists Carbon_automation_hive (imei string,deviceInformationId int,MAC string,deviceColor string,device_backColor string,modelId string,marketName string,AMSize string,ROMSize string,CUPAudit string,CPIClocked string,series string,productionDate timestamp,bomCode string,internalModels string, deliveryTime string, channelsId string, channelsName string , deliveryAreaId string, deliveryCountry string, deliveryProvince string, deliveryCity string,deliveryDistrict string, deliveryStreet string, oxSingleNumber st
[2/2] incubator-carbondata git commit: [CARBONDATA-544] Delete core/.TestFileFactory.carbondata.crc, core/Testdb.carbon This closes #446
[CARBONDATA-544] Delete core/.TestFileFactory.carbondata.crc,core/Testdb.carbon This closes #446 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/5e50d043 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/5e50d043 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/5e50d043 Branch: refs/heads/master Commit: 5e50d0433232e900a863a62501fda06259164d69 Parents: 51463ff 345fb65 Author: jackylk Authored: Mon Dec 19 21:29:12 2016 +0800 Committer: jackylk Committed: Mon Dec 19 21:29:12 2016 +0800 -- .../filesystem/store/impl/FileFactoryImplUnitTest.java | 8 +++- .../datastorage/store/filesystem/LocalCarbonFileTest.java| 4 2 files changed, 11 insertions(+), 1 deletion(-) --
[1/2] incubator-carbondata git commit: Delete core/.TestFileFactory.carbondata.crc, core/Testdb.carbon
Repository: incubator-carbondata Updated Branches: refs/heads/master 51463ff2e -> 5e50d0433 Delete core/.TestFileFactory.carbondata.crc,core/Testdb.carbon Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/345fb652 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/345fb652 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/345fb652 Branch: refs/heads/master Commit: 345fb65256730e53f6d080cf18d135a08ed6b7bd Parents: 51463ff Author: chenliang613 Authored: Mon Dec 19 20:13:01 2016 +0800 Committer: jackylk Committed: Mon Dec 19 21:28:45 2016 +0800 -- .../filesystem/store/impl/FileFactoryImplUnitTest.java | 8 +++- .../datastorage/store/filesystem/LocalCarbonFileTest.java| 4 2 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/345fb652/core/src/test/java/org/apache/carbondata/core/carbon/datastorage/filesystem/store/impl/FileFactoryImplUnitTest.java -- diff --git a/core/src/test/java/org/apache/carbondata/core/carbon/datastorage/filesystem/store/impl/FileFactoryImplUnitTest.java b/core/src/test/java/org/apache/carbondata/core/carbon/datastorage/filesystem/store/impl/FileFactoryImplUnitTest.java index 62de46b..8277f91 100644 --- a/core/src/test/java/org/apache/carbondata/core/carbon/datastorage/filesystem/store/impl/FileFactoryImplUnitTest.java +++ b/core/src/test/java/org/apache/carbondata/core/carbon/datastorage/filesystem/store/impl/FileFactoryImplUnitTest.java @@ -41,11 +41,17 @@ public class FileFactoryImplUnitTest { private static String filePath; - @AfterClass public static void tearDown() { + @AfterClass + public static void tearDown() { File file = new File(filePath); if (file.exists()) { file.delete(); } + +File file1 = new File(".TestFileFactory.carbondata.crc"); +if (file1.exists()) { + file1.delete(); +} } @BeforeClass public static void setUp() { http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/345fb652/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/LocalCarbonFileTest.java -- diff --git a/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/LocalCarbonFileTest.java b/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/LocalCarbonFileTest.java index 7b787d4..6ea4a9b 100644 --- a/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/LocalCarbonFileTest.java +++ b/core/src/test/java/org/apache/carbondata/core/datastorage/store/filesystem/LocalCarbonFileTest.java @@ -125,6 +125,10 @@ public class LocalCarbonFileTest { public void testRenameForce() { localCarbonFile = new LocalCarbonFile(file); assertTrue(localCarbonFile.renameForce("Testdb.carbon")); +File file1 = new File("Testdb.carbon"); +if (file1.exists()) { +file1.delete(); +} } @Test
[2/4] incubator-carbondata git commit: add initial check in for vector reader
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/376d69ff/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionExecutor.java -- diff --git a/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionExecutor.java b/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionExecutor.java index 9fa63d6..dc5fb17 100644 --- a/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionExecutor.java +++ b/integration/spark-common/src/main/java/org/apache/carbondata/spark/merger/CarbonCompactionExecutor.java @@ -145,7 +145,7 @@ public class CarbonCompactionExecutor { throws QueryExecutionException { queryModel.setTableBlockInfos(blockList); -this.queryExecutor = QueryExecutorFactory.getQueryExecutor(); +this.queryExecutor = QueryExecutorFactory.getQueryExecutor(queryModel); CarbonIterator iter = null; try { iter = queryExecutor.execute(queryModel); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/376d69ff/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala index d654067..f20d12d 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala @@ -18,17 +18,18 @@ package org.apache.carbondata.spark.rdd import java.text.SimpleDateFormat -import java.util +import java.util.ArrayList import java.util.Date +import java.util.List import scala.collection.JavaConverters._ -import scala.reflect.ClassTag import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.mapreduce.{InputSplit, Job, JobID, TaskAttemptID, TaskType} +import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl -import org.apache.spark.{Partition, SparkContext, TaskContext, TaskKilledException} +import org.apache.spark._ import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.hive.DistributionUtil import org.apache.carbondata.common.logging.LogServiceFactory @@ -37,8 +38,9 @@ import org.apache.carbondata.core.carbon.datastore.block.Distributable import org.apache.carbondata.core.carbon.metadata.schema.table.CarbonTable import org.apache.carbondata.core.carbon.querystatistics.{QueryStatistic, QueryStatisticsConstants} import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory -import org.apache.carbondata.hadoop.{CarbonInputFormat, CarbonInputSplit, CarbonMultiBlockSplit, CarbonProjection} +import org.apache.carbondata.hadoop._ import org.apache.carbondata.scan.expression.Expression +import org.apache.carbondata.scan.model.QueryModel import org.apache.carbondata.spark.load.CarbonLoaderUtil /** @@ -46,19 +48,20 @@ import org.apache.carbondata.spark.load.CarbonLoaderUtil * CarbonData file, this RDD will leverage CarbonData's index information to do CarbonData file * level filtering in driver side. */ -class CarbonScanRDD[V: ClassTag]( +class CarbonScanRDD( @transient sc: SparkContext, columnProjection: CarbonProjection, filterExpression: Expression, identifier: AbsoluteTableIdentifier, @transient carbonTable: CarbonTable) - extends RDD[V](sc, Nil) { + extends RDD[InternalRow](sc, Nil) { private val queryId = sparkContext.getConf.get("queryId", System.nanoTime() + "") private val jobTrackerId: String = { val formatter = new SimpleDateFormat("MMddHHmm") formatter.format(new Date()) } + private var vectorReader = false @transient private val jobId = new JobID(jobTrackerId, id) @transient val LOGGER = LogServiceFactory.getLogService(this.getClass.getName) @@ -76,7 +79,7 @@ class CarbonScanRDD[V: ClassTag]( result } - private def distributeSplits(splits: util.List[InputSplit]): Array[Partition] = { + private def distributeSplits(splits: List[InputSplit]): Array[Partition] = { // this function distributes the split based on following logic: // 1. based on data locality, to make split balanced on all available nodes // 2. if the number of split for one @@ -84,7 +87,7 @@ class CarbonScanRDD[V: ClassTag]( var statistic = new QueryStatistic() val statisticRecorder = CarbonTimeStatisticsFactory.createDriverRecorder() val parallelism = sparkContext.defaultParallelism -val result = new util.ArrayList[Partition](parallelism) +val result = new ArrayList[Partition](parallelism) var noOfBlocks
[1/4] incubator-carbondata git commit: add initial check in for vector reader
Repository: incubator-carbondata Updated Branches: refs/heads/master 5e50d0433 -> a561d869c http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/376d69ff/integration/spark2/src/test/resources/dataDiff.csv -- diff --git a/integration/spark2/src/test/resources/dataDiff.csv b/integration/spark2/src/test/resources/dataDiff.csv new file mode 100644 index 000..6407b10 --- /dev/null +++ b/integration/spark2/src/test/resources/dataDiff.csv @@ -0,0 +1,1001 @@ +ID,date,country,name,phonetype,serialname,salary +1,2015/7/23,china,aaa1,phone197,ASD69643,15000 +2,2015/7/24,china,aaa2,phone756,ASD42892,15001 +3,2015/7/25,china,aaa3,phone1904,ASD37014,15002 +4,2015/7/26,china,aaa4,phone2435,ASD66902,15003 +5,2015/7/27,china,aaa5,phone2441,ASD90633,15004 +6,2015/7/28,china,aaa6,phone294,ASD59961,15005 +7,2015/7/29,china,aaa7,phone610,ASD14875,15006 +8,2015/7/30,china,aaa8,phone1848,ASD57308,15007 +9,2015/7/18,china,aaa9,phone706,ASD86717,15008 +10,2015/7/19,usa,aaa10,phone685,ASD30505,15009 +11,2015/7/18,china,aaa11,phone1554,ASD26101,15010 +12,2015/7/19,china,aaa12,phone1781,ASD85711,15011 +13,2015/7/20,china,aaa13,phone943,ASD39200,15012 +14,2015/7/21,china,aaa14,phone1954,ASD80468,15013 +15,2015/7/22,china,aaa15,phone451,ASD1954,15014 +16,2015/7/23,china,aaa16,phone390,ASD38513,15015 +17,2015/7/24,china,aaa17,phone1929,ASD86213,15016 +18,2015/7/25,usa,aaa18,phone910,ASD88812,15017 +19,2015/7/26,china,aaa19,phone2151,ASD9316,15018 +20,2015/7/27,china,aaa20,phone2625,ASD62597,15019 +21,2015/7/28,china,aaa21,phone1371,ASD27896,15020 +22,2015/7/29,china,aaa22,phone945,ASD79760,15021 +23,2015/7/30,china,aaa23,phone2177,ASD45410,15022 +24,2015/7/31,china,aaa24,phone1586,ASD80645,15023 +25,2015/8/1,china,aaa25,phone1310,ASD36408,15024 +26,2015/8/2,china,aaa26,phone1579,ASD14571,15025 +27,2015/8/3,china,aaa27,phone2123,ASD36243,15026 +28,2015/8/4,china,aaa28,phone2334,ASD57825,15027 +29,2015/8/5,china,aaa29,phone1166,ASD26161,15028 +30,2015/8/6,china,aaa30,phone2248,ASD47899,15029 +31,2015/8/7,china,aaa31,phone475,ASD89811,15030 +32,2015/8/8,china,aaa32,phone2499,ASD87974,15031 +33,2015/8/9,china,aaa33,phone2333,ASD62408,15032 +34,2015/8/10,china,aaa34,phone1128,ASD73138,15033 +35,2015/8/11,china,aaa35,phone1063,ASD29573,15034 +36,2015/8/12,china,aaa36,phone1633,ASD82574,15035 +37,2015/8/13,china,aaa37,phone775,ASD47938,15036 +38,2015/8/14,china,aaa38,phone817,ASD40947,15037 +39,2015/8/15,china,aaa39,phone2221,ASD6379,15038 +40,2015/8/16,china,aaa40,phone2289,ASD48374,15039 +41,2015/8/17,china,aaa41,phone599,ASD44560,15040 +42,2015/8/18,china,aaa42,phone384,ASD613,15041 +43,2015/8/19,china,aaa43,phone731,ASD66050,15042 +44,2015/8/20,china,aaa44,phone2128,ASD39759,15043 +45,2015/8/21,china,aaa45,phone1503,ASD31200,15044 +46,2015/8/22,china,aaa46,phone1833,ASD22945,15045 +47,2015/8/23,china,aaa47,phone2346,ASD80162,15046 +48,2015/8/24,china,aaa48,phone2714,ASD27822,15047 +49,2015/8/25,china,aaa49,phone1582,ASD21279,15048 +50,2015/8/26,china,aaa50,phone83,ASD17242,15049 +51,2015/8/27,china,aaa51,phone54,ASD29131,15050 +52,2015/8/28,china,aaa52,phone526,ASD73647,15051 +53,2015/8/29,china,aaa53,phone1308,ASD80493,15052 +54,2015/8/30,china,aaa54,phone2785,ASD30573,15053 +55,2015/8/31,china,aaa55,phone2133,ASD49757,15054 +56,2015/9/1,china,aaa56,phone871,ASD54753,15055 +57,2015/9/2,china,aaa57,phone1570,ASD25758,15056 +58,2015/9/3,china,aaa58,phone434,ASD30291,15057 +59,2015/9/4,china,aaa59,phone2023,ASD60739,15058 +60,2015/9/5,china,aaa60,phone1755,ASD4955,15059 +61,2015/9/6,china,aaa61,phone1120,ASD41678,15060 +62,2015/9/7,china,aaa62,phone526,ASD73647,15061 +63,2015/9/8,china,aaa63,phone111,ASD20917,15062 +64,2015/9/9,china,aaa64,phone2477,ASD78171,15063 +65,2015/9/10,china,aaa65,phone1458,ASD3023,15064 +66,2015/9/11,china,aaa66,phone33,ASD54379,15065 +67,2015/9/12,china,aaa67,phone1710,ASD65296,15066 +68,2015/9/13,china,aaa68,phone118,ASD4568,15067 +69,2015/9/14,china,aaa69,phone2772,ASD42161,15068 +70,2015/9/15,china,aaa70,phone1013,ASD88261,15069 +71,2015/9/16,china,aaa71,phone1606,ASD33903,15070 +72,2015/9/17,china,aaa72,phone2800,ASD60308,15071 +73,2015/9/18,china,aaa73,phone2461,ASD14645,15072 +74,2015/9/19,china,aaa74,phone1038,ASD66620,15073 +75,2015/9/20,china,aaa75,phone2882,ASD23220,15074 +76,2015/9/21,china,aaa76,phone1665,ASD31618,15075 +77,2015/9/22,china,aaa77,phone2991,ASD37964,15076 +78,2015/9/23,china,aaa78,phone620,ASD7257,15077 +79,2015/9/24,china,aaa79,phone1097,ASD12510,15078 +80,2015/9/25,usa,aaa80,phone1668,ASD41149,15079 +81,2015/9/26,china,aaa81,phone2869,ASD95862,15080 +82,2015/9/27,china,aaa82,phone2506,ASD77011,15081 +83,2015/9/28,china,aaa83,phone2897,ASD6674,15082 +84,2015/9/29,china,aaa84,phone954,ASD72595,15083 +85,2015/9/30,china,aaa85,phone1382,ASD86617,15084 +86,2015/10/1,china,aaa86,phone284,ASD31454,15085 +87,2015/10/2,china,aaa87,phone1000,ASD1404,15086 +88,2015/10/3,china,aaa88,phone1813,ASD6955,15087 +89
[4/4] incubator-carbondata git commit: [CARBONDATA-519]Added vector reader in Carbon Spark integration layer and in Carbon scan. This closes #412
[CARBONDATA-519]Added vector reader in Carbon Spark integration layer and in Carbon scan. This closes #412 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/a561d869 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/a561d869 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/a561d869 Branch: refs/heads/master Commit: a561d869c69e9debfcc183b1e9a5202883aa8ceb Parents: 5e50d04 376d69f Author: jackylk Authored: Tue Dec 20 11:10:38 2016 +0800 Committer: jackylk Committed: Tue Dec 20 11:10:38 2016 +0800 -- .../chunk/DimensionColumnDataChunk.java | 13 + .../impl/ColumnGroupDimensionDataChunk.java | 68 +- .../impl/FixedLengthDimensionDataChunk.java | 93 +- .../impl/VariableLengthDimensionDataChunk.java | 48 + .../core/constants/CarbonCommonConstants.java |9 + .../DirectDictionaryGenerator.java |8 + .../DateDirectDictionaryGenerator.java |4 + .../TimeStampDirectDictionaryGenerator.java |5 + .../scan/collector/ScannedResultCollector.java |7 + .../impl/AbstractScannedResultCollector.java|5 + .../DictionaryBasedVectorResultCollector.java | 142 +++ .../scan/executor/QueryExecutorFactory.java | 10 +- .../executor/impl/AbstractQueryExecutor.java|1 + .../impl/VectorDetailQueryExecutor.java | 41 + .../scan/executor/infos/BlockExecutionInfo.java | 13 + .../carbondata/scan/model/QueryModel.java | 10 + .../processor/AbstractDataBlockIterator.java|7 +- .../processor/impl/DataBlockIteratorImpl.java | 10 + .../scan/result/AbstractScannedResult.java | 85 +- .../result/impl/FilterQueryScannedResult.java | 35 + .../AbstractDetailQueryResultIterator.java |5 + .../result/iterator/VectorChunkRowIterator.java | 93 ++ .../VectorDetailQueryResultIterator.java| 52 + .../scan/result/vector/CarbonColumnVector.java | 47 + .../scan/result/vector/CarbonColumnarBatch.java | 62 ++ .../scan/result/vector/ColumnVectorInfo.java| 41 + .../vector/MeasureDataVectorProcessor.java | 268 + .../vector/impl/CarbonColumnVectorImpl.java | 154 +++ .../carbondata/hadoop/CarbonInputFormat.java| 14 +- .../carbondata/hadoop/CarbonRecordReader.java |2 +- .../readsupport/impl/RawDataReadSupport.java|9 +- .../spark/merger/CarbonCompactionExecutor.java |2 +- .../carbondata/spark/rdd/CarbonScanRDD.scala| 71 +- .../scala/org/apache/spark/sql/CarbonScan.scala |6 +- .../ColumnGroupDataTypesTestCase.scala |4 +- .../readsupport/SparkRowReadSupportImpl.java| 22 +- .../vectorreader/ColumnarVectorWrapper.java | 80 ++ .../VectorizedCarbonRecordReader.java | 256 + .../sql/CarbonDatasourceHadoopRelation.scala|5 +- .../spark/sql/CarbonDictionaryDecoder.scala | 20 +- .../execution/CarbonLateDecodeStrategy.scala| 89 +- .../spark2/src/test/resources/dataDiff.csv | 1001 ++ .../carbondata/CarbonDataSourceSuite.scala | 27 +- .../spark/carbondata/util/QueryTest.scala | 66 ++ .../vectorreader/VectorReaderTestCase.scala | 79 ++ 45 files changed, 2972 insertions(+), 117 deletions(-) --
[3/4] incubator-carbondata git commit: add initial check in for vector reader
add initial check in for vector reader Added vector reader in carbon Fixed check style Added batch reader support in spark layer rebased Fixed issues Added testcases fixed testcase fixed comments fixed style fixed testcase Fixed review comments Fixed testcase and comments Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/376d69ff Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/376d69ff Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/376d69ff Branch: refs/heads/master Commit: 376d69ff72c88d541022d3cea98ef1d6a7542ee6 Parents: 5e50d04 Author: ravipesala Authored: Tue Dec 6 23:24:05 2016 +0530 Committer: jackylk Committed: Tue Dec 20 11:01:27 2016 +0800 -- .../chunk/DimensionColumnDataChunk.java | 13 + .../impl/ColumnGroupDimensionDataChunk.java | 68 +- .../impl/FixedLengthDimensionDataChunk.java | 93 +- .../impl/VariableLengthDimensionDataChunk.java | 48 + .../core/constants/CarbonCommonConstants.java |9 + .../DirectDictionaryGenerator.java |8 + .../DateDirectDictionaryGenerator.java |4 + .../TimeStampDirectDictionaryGenerator.java |5 + .../scan/collector/ScannedResultCollector.java |7 + .../impl/AbstractScannedResultCollector.java|5 + .../DictionaryBasedVectorResultCollector.java | 142 +++ .../scan/executor/QueryExecutorFactory.java | 10 +- .../executor/impl/AbstractQueryExecutor.java|1 + .../impl/VectorDetailQueryExecutor.java | 41 + .../scan/executor/infos/BlockExecutionInfo.java | 13 + .../carbondata/scan/model/QueryModel.java | 10 + .../processor/AbstractDataBlockIterator.java|7 +- .../processor/impl/DataBlockIteratorImpl.java | 10 + .../scan/result/AbstractScannedResult.java | 85 +- .../result/impl/FilterQueryScannedResult.java | 35 + .../AbstractDetailQueryResultIterator.java |5 + .../result/iterator/VectorChunkRowIterator.java | 93 ++ .../VectorDetailQueryResultIterator.java| 52 + .../scan/result/vector/CarbonColumnVector.java | 47 + .../scan/result/vector/CarbonColumnarBatch.java | 62 ++ .../scan/result/vector/ColumnVectorInfo.java| 41 + .../vector/MeasureDataVectorProcessor.java | 268 + .../vector/impl/CarbonColumnVectorImpl.java | 154 +++ .../carbondata/hadoop/CarbonInputFormat.java| 14 +- .../carbondata/hadoop/CarbonRecordReader.java |2 +- .../readsupport/impl/RawDataReadSupport.java|9 +- .../spark/merger/CarbonCompactionExecutor.java |2 +- .../carbondata/spark/rdd/CarbonScanRDD.scala| 71 +- .../scala/org/apache/spark/sql/CarbonScan.scala |6 +- .../ColumnGroupDataTypesTestCase.scala |4 +- .../readsupport/SparkRowReadSupportImpl.java| 22 +- .../vectorreader/ColumnarVectorWrapper.java | 80 ++ .../VectorizedCarbonRecordReader.java | 256 + .../sql/CarbonDatasourceHadoopRelation.scala|5 +- .../spark/sql/CarbonDictionaryDecoder.scala | 20 +- .../execution/CarbonLateDecodeStrategy.scala| 89 +- .../spark2/src/test/resources/dataDiff.csv | 1001 ++ .../carbondata/CarbonDataSourceSuite.scala | 27 +- .../spark/carbondata/util/QueryTest.scala | 66 ++ .../vectorreader/VectorReaderTestCase.scala | 79 ++ 45 files changed, 2972 insertions(+), 117 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/376d69ff/core/src/main/java/org/apache/carbondata/core/carbon/datastore/chunk/DimensionColumnDataChunk.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/carbon/datastore/chunk/DimensionColumnDataChunk.java b/core/src/main/java/org/apache/carbondata/core/carbon/datastore/chunk/DimensionColumnDataChunk.java index ddc76c0..efa67e6 100644 --- a/core/src/main/java/org/apache/carbondata/core/carbon/datastore/chunk/DimensionColumnDataChunk.java +++ b/core/src/main/java/org/apache/carbondata/core/carbon/datastore/chunk/DimensionColumnDataChunk.java @@ -19,6 +19,7 @@ package org.apache.carbondata.core.carbon.datastore.chunk; import org.apache.carbondata.scan.executor.infos.KeyStructureInfo; +import org.apache.carbondata.scan.result.vector.ColumnVectorInfo; /** * Interface for dimension column chunk. @@ -46,6 +47,18 @@ public interface DimensionColumnDataChunk { KeyStructureInfo restructuringInfo); /** + * Fill the data to vector + */ + int fillConvertedChunkData(ColumnVectorInfo[] vectorInfo, int column, + KeyStructureInfo restructuringInfo); + + /** + * Fill the data to vector + */ + int fillConvertedChunkData(int
[1/2] incubator-carbondata git commit: fix load bug when table name has '_'
Repository: incubator-carbondata Updated Branches: refs/heads/master a561d869c -> ddeb00425 fix load bug when table name has '_' Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/fc1d620e Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/fc1d620e Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/fc1d620e Branch: refs/heads/master Commit: fc1d620e729746271c70159636e0cc05ac46d99e Parents: a561d86 Author: Jay357089 Authored: Wed Nov 16 10:49:35 2016 +0800 Committer: jackylk Committed: Wed Dec 21 16:43:20 2016 +0800 -- .../core/carbon/path/CarbonTablePath.java | 8 ++-- .../core/constants/CarbonCommonConstants.java | 4 ++ .../dataload/TestLoadDataGeneral.scala | 42 +++- 3 files changed, 50 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/fc1d620e/core/src/main/java/org/apache/carbondata/core/carbon/path/CarbonTablePath.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/carbon/path/CarbonTablePath.java b/core/src/main/java/org/apache/carbondata/core/carbon/path/CarbonTablePath.java index 99531e4..f90073e 100644 --- a/core/src/main/java/org/apache/carbondata/core/carbon/path/CarbonTablePath.java +++ b/core/src/main/java/org/apache/carbondata/core/carbon/path/CarbonTablePath.java @@ -397,10 +397,12 @@ public class CarbonTablePath extends Path { */ public static String getSegmentId(String dataFileAbsolutePath) { // find segment id from last of data file path - int endIndex = dataFileAbsolutePath.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR); + String tempdataFileAbsolutePath = dataFileAbsolutePath.replace( + CarbonCommonConstants.WINDOWS_FILE_SEPARATOR, CarbonCommonConstants.FILE_SEPARATOR); + int endIndex = tempdataFileAbsolutePath.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR); // + 1 for size of "/" - int startIndex = - dataFileAbsolutePath.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR, endIndex - 1) + 1; + int startIndex = tempdataFileAbsolutePath.lastIndexOf( + CarbonCommonConstants.FILE_SEPARATOR, endIndex - 1) + 1; String segmentDirStr = dataFileAbsolutePath.substring(startIndex, endIndex); //identify id in segment_ String[] segmentDirSplits = segmentDirStr.split("_"); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/fc1d620e/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index c32956c..2d1ba9f 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -461,6 +461,10 @@ public final class CarbonCommonConstants { */ public static final String POINT = "."; /** + * Windows File separator + */ + public static final String WINDOWS_FILE_SEPARATOR = "\\"; + /** * File separator */ public static final String FILE_SEPARATOR = "/"; http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/fc1d620e/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala -- diff --git a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala index 9904c93..6a274b6 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala @@ -22,11 +22,16 @@ package org.apache.carbondata.integration.spark.testsuite.dataload import java.io.File import java.math.BigDecimal +import org.apache.carbondata.core.carbon.path.{CarbonStorePath, CarbonTablePath} +import org.apache.carbondata.core.datastorage.store.filesystem.CarbonFile +import org.apache.carbondata.core.datastorage.store.impl.FileFactory import org.apache.spark.sql.Row import org.apache.spark.sql.common.util.CarbonHiveContext._ -import org.apache.spark.sql.common.util.QueryT
[2/2] incubator-carbondata git commit: [CARBONDATA-412] Fix load bug when table name has '_' This closes #320
[CARBONDATA-412] Fix load bug when table name has '_' This closes #320 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/ddeb0042 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/ddeb0042 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/ddeb0042 Branch: refs/heads/master Commit: ddeb00425537ff59bdfba76779c5d96287e07d2e Parents: a561d86 fc1d620 Author: jackylk Authored: Wed Dec 21 16:45:29 2016 +0800 Committer: jackylk Committed: Wed Dec 21 16:45:29 2016 +0800 -- .../core/carbon/path/CarbonTablePath.java | 8 ++-- .../core/constants/CarbonCommonConstants.java | 4 ++ .../dataload/TestLoadDataGeneral.scala | 42 +++- 3 files changed, 50 insertions(+), 4 deletions(-) --
[2/2] incubator-carbondata git commit: [CARBONDATA-563] Fixed query failing in cluster This closes #463
[CARBONDATA-563] Fixed query failing in cluster This closes #463 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/a0e66461 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/a0e66461 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/a0e66461 Branch: refs/heads/master Commit: a0e66461ba6b27bfdf96fe9bf77667d2ffd26f9d Parents: 4a8e15d bfb41c6 Author: jackylk Authored: Mon Dec 26 10:03:52 2016 +0800 Committer: jackylk Committed: Mon Dec 26 10:03:52 2016 +0800 -- .../scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala| 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) --
[1/2] incubator-carbondata git commit: fixed cluster issue
Repository: incubator-carbondata Updated Branches: refs/heads/master 4a8e15d34 -> a0e66461b fixed cluster issue Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/bfb41c6c Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/bfb41c6c Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/bfb41c6c Branch: refs/heads/master Commit: bfb41c6cdc52b37baa718b49f7637156a1d2562f Parents: 4a8e15d Author: ravipesala Authored: Sun Dec 25 21:00:08 2016 +0530 Committer: jackylk Committed: Mon Dec 26 10:03:29 2016 +0800 -- .../scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala| 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/bfb41c6c/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala index f20d12d..2705f94 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala @@ -63,6 +63,8 @@ class CarbonScanRDD( } private var vectorReader = false + private val readSupport = SparkReadSupport.readSupportClass + @transient private val jobId = new JobID(jobTrackerId, id) @transient val LOGGER = LogServiceFactory.getLogService(this.getClass.getName) @@ -214,7 +216,7 @@ class CarbonScanRDD( } private def prepareInputFormatForExecutor(conf: Configuration): CarbonInputFormat[Object] = { -CarbonInputFormat.setCarbonReadSupport(conf, SparkReadSupport.readSupportClass) +CarbonInputFormat.setCarbonReadSupport(conf, readSupport) createInputFormat(conf) }
[1/2] incubator-carbondata git commit: add example:directly write dataframe to carbonfile without temp CSV
Repository: incubator-carbondata Updated Branches: refs/heads/master a0e66461b -> 2ee23acf4 add example:directly write dataframe to carbonfile without temp CSV Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/9bbc937a Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/9bbc937a Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/9bbc937a Branch: refs/heads/master Commit: 9bbc937adaadb88c4395b295916036724491bb8f Parents: a0e6646 Author: chenliang613 Authored: Sun Dec 25 12:04:26 2016 +0800 Committer: jackylk Committed: Mon Dec 26 10:46:19 2016 +0800 -- .../carbondata/examples/AllDictionaryExample.scala | 2 +- .../apache/carbondata/examples/HadoopFileExample.scala | 2 +- .../org/apache/carbondata/examples/PerfTest.scala | 6 ++ .../apache/carbondata/examples/util/ExampleUtils.scala | 13 - 4 files changed, 16 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/9bbc937a/examples/spark/src/main/scala/org/apache/carbondata/examples/AllDictionaryExample.scala -- diff --git a/examples/spark/src/main/scala/org/apache/carbondata/examples/AllDictionaryExample.scala b/examples/spark/src/main/scala/org/apache/carbondata/examples/AllDictionaryExample.scala index 9fecadb..4dcc868 100644 --- a/examples/spark/src/main/scala/org/apache/carbondata/examples/AllDictionaryExample.scala +++ b/examples/spark/src/main/scala/org/apache/carbondata/examples/AllDictionaryExample.scala @@ -23,7 +23,7 @@ import org.apache.carbondata.examples.util.{AllDictionaryUtil, ExampleUtils} object AllDictionaryExample { def main(args: Array[String]) { -val cc = ExampleUtils.createCarbonContext("CarbonExample") +val cc = ExampleUtils.createCarbonContext("AllDictionaryExample") val testData = ExampleUtils.currentPath + "/src/main/resources/data.csv" val csvHeader = "ID,date,country,name,phonetype,serialname,salary" val dictCol = "|date|country|name|phonetype|serialname|" http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/9bbc937a/examples/spark/src/main/scala/org/apache/carbondata/examples/HadoopFileExample.scala -- diff --git a/examples/spark/src/main/scala/org/apache/carbondata/examples/HadoopFileExample.scala b/examples/spark/src/main/scala/org/apache/carbondata/examples/HadoopFileExample.scala index 329b3c9..292a3b5 100644 --- a/examples/spark/src/main/scala/org/apache/carbondata/examples/HadoopFileExample.scala +++ b/examples/spark/src/main/scala/org/apache/carbondata/examples/HadoopFileExample.scala @@ -24,7 +24,7 @@ import org.apache.carbondata.hadoop.CarbonInputFormat object HadoopFileExample { def main(args: Array[String]): Unit = { -val cc = ExampleUtils.createCarbonContext("DataFrameAPIExample") +val cc = ExampleUtils.createCarbonContext("HadoopFileExample") ExampleUtils.writeSampleCarbonFile(cc, "carbon1") val sc = cc.sparkContext http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/9bbc937a/examples/spark/src/main/scala/org/apache/carbondata/examples/PerfTest.scala -- diff --git a/examples/spark/src/main/scala/org/apache/carbondata/examples/PerfTest.scala b/examples/spark/src/main/scala/org/apache/carbondata/examples/PerfTest.scala index b1f6b24..824730f 100644 --- a/examples/spark/src/main/scala/org/apache/carbondata/examples/PerfTest.scala +++ b/examples/spark/src/main/scala/org/apache/carbondata/examples/PerfTest.scala @@ -145,15 +145,13 @@ class QueryRunner(sqlContext: SQLContext, dataFrame: DataFrame, datasources: Seq def shutDown(): Unit = { // drop all tables and temp files -datasources.foreach { datasource => - datasource match { -case "parquet" | "orc" => +datasources.foreach { +case datasource @ ("parquet" | "orc") => val f = new File(PerfTest.savePath(datasource)) if (f.exists()) f.delete() case "carbon" => sqlContext.sql(s"DROP TABLE IF EXISTS ${PerfTest.makeTableName("carbon")}") case _ => sys.error("unsupported data source") - } } } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/9bbc937a/examples/spark/src/main/scala/org/apache/carbondata/examples/util/ExampleUtils.scala -- diff --git a/examples/
[2/2] incubator-carbondata git commit: Add example:directly write dataframe to carbon file without temp CSV This closes #462
Add example:directly write dataframe to carbon file without temp CSV This closes #462 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/2ee23acf Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/2ee23acf Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/2ee23acf Branch: refs/heads/master Commit: 2ee23acf4194b855a5264d17b57b48d5c869cf34 Parents: a0e6646 9bbc937 Author: jackylk Authored: Mon Dec 26 10:46:53 2016 +0800 Committer: jackylk Committed: Mon Dec 26 10:46:53 2016 +0800 -- .../carbondata/examples/AllDictionaryExample.scala | 2 +- .../apache/carbondata/examples/HadoopFileExample.scala | 2 +- .../org/apache/carbondata/examples/PerfTest.scala | 6 ++ .../apache/carbondata/examples/util/ExampleUtils.scala | 13 - 4 files changed, 16 insertions(+), 7 deletions(-) --
[1/2] incubator-carbondata git commit: [CARBONDATA-560] In QueryExecutionException, can not use executorService.shutdownNow() to shut down immediately.
Repository: incubator-carbondata Updated Branches: refs/heads/master 2ee23acf4 -> 37dc0fb6d [CARBONDATA-560] In QueryExecutionException, can not use executorService.shutdownNow() to shut down immediately. In QueryExecutionException, can not use executorService.shutdownNow() to shut down immediately. fix compilation error fix QueryExecutionException processing Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/d9fc651f Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/d9fc651f Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/d9fc651f Branch: refs/heads/master Commit: d9fc651fdbe377b47f26efa98a8ba3e89762719d Parents: 2ee23ac Author: Liang Chen Authored: Sat Dec 24 17:10:56 2016 +0800 Committer: jackylk Committed: Mon Dec 26 16:34:04 2016 +0800 -- .../carbondata/scan/executor/impl/AbstractQueryExecutor.java | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/d9fc651f/core/src/main/java/org/apache/carbondata/scan/executor/impl/AbstractQueryExecutor.java -- diff --git a/core/src/main/java/org/apache/carbondata/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/scan/executor/impl/AbstractQueryExecutor.java index df04dae..f6df175 100644 --- a/core/src/main/java/org/apache/carbondata/scan/executor/impl/AbstractQueryExecutor.java +++ b/core/src/main/java/org/apache/carbondata/scan/executor/impl/AbstractQueryExecutor.java @@ -25,6 +25,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; @@ -470,7 +471,12 @@ public abstract class AbstractQueryExecutor implements QueryExecutor { */ @Override public void finish() throws QueryExecutionException { if (null != queryProperties.executorService) { - queryProperties.executorService.shutdownNow(); + queryProperties.executorService.shutdown(); + try { +queryProperties.executorService.awaitTermination(1, TimeUnit.HOURS); + } catch (InterruptedException e) { +throw new QueryExecutionException(e); + } } }
[2/2] incubator-carbondata git commit: [CARBONDATA-560] fix QueryExecutionException processing. This closes #460
[CARBONDATA-560] fix QueryExecutionException processing. This closes #460 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/37dc0fb6 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/37dc0fb6 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/37dc0fb6 Branch: refs/heads/master Commit: 37dc0fb6d78f4ae9ba92b73c64bd8d1e7e3e70ec Parents: 2ee23ac d9fc651 Author: jackylk Authored: Mon Dec 26 16:34:30 2016 +0800 Committer: jackylk Committed: Mon Dec 26 16:34:30 2016 +0800 -- .../carbondata/scan/executor/impl/AbstractQueryExecutor.java | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) --
[1/4] incubator-carbondata git commit: Initial commit
Repository: incubator-carbondata Updated Branches: refs/heads/master 28190eb71 -> e8dcd4296 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/bedc96d0/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala new file mode 100644 index 000..9a3f828 --- /dev/null +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSparkSqlParser.scala @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.parser + +import scala.collection.JavaConverters._ + +import org.apache.spark.sql.catalyst.catalog.CatalogColumn +import org.apache.spark.sql.catalyst.parser.{AbstractSqlParser, ParseException, SqlBaseParser} +import org.apache.spark.sql.catalyst.parser.ParserUtils._ +import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{ColTypeListContext, CreateTableContext, TablePropertyListContext} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.execution.SparkSqlAstBuilder +import org.apache.spark.sql.execution.command.{CreateTable, Field, TableModel} +import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution} +import org.apache.spark.sql.types.DataType + +import org.apache.carbondata.spark.exception.MalformedCarbonCommandException +import org.apache.carbondata.spark.util.CommonUtil + +/** + * Concrete parser for Spark SQL statements and carbon specific statements + */ +class CarbonSparkSqlParser(conf: SQLConf) extends AbstractSqlParser { + + val astBuilder = new CarbonSqlAstBuilder(conf) + + private val substitutor = new VariableSubstitution(conf) + + protected override def parse[T](command: String)(toResult: SqlBaseParser => T): T = { +super.parse(substitutor.substitute(command))(toResult) + } + + override def parsePlan(sqlText: String): LogicalPlan = { +try { + super.parsePlan(sqlText) +} catch { + case e: Throwable => +astBuilder.parser.parse(sqlText) +} + } +} + +class CarbonSqlAstBuilder(conf: SQLConf) extends SparkSqlAstBuilder(conf) { + + val parser = new CarbonSpark2SqlParser + + override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = { +val fileStorage = Option(ctx.createFileFormat) match { + case Some(value) => value.storageHandler().STRING().getSymbol.getText + case _ => "" +} +if (fileStorage.equalsIgnoreCase("'carbondata'") || +fileStorage.equalsIgnoreCase("'org.apache.carbondata.format'")) { + val (name, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader) + // TODO: implement temporary tables + if (temp) { +throw new ParseException( + "CREATE TEMPORARY TABLE is not supported yet. " + + "Please use CREATE TEMPORARY VIEW as an alternative.", ctx) + } + if (ctx.skewSpec != null) { +operationNotAllowed("CREATE TABLE ... SKEWED BY", ctx) + } + if (ctx.bucketSpec != null) { +operationNotAllowed("CREATE TABLE ... CLUSTERED BY", ctx) + } + val comment = Option(ctx.STRING).map(string) + val partitionCols = Option(ctx.partitionColumns).toSeq.flatMap(visitCatalogColumns) + val cols = Option(ctx.columns).toSeq.flatMap(visitCatalogColumns) + val properties = Option(ctx.tablePropertyList).map(visitPropertyKeyValues) +.getOrElse(Map.empty) + + // Ensuring whether no duplicate name is used in table definition + val colNames = cols.map(_.name) + if (colNames.length != colNames.distinct.length) { +val duplicateColumns = colNames.groupBy(identity).collect { + case (x, ys) if ys.length > 1 => "\"" + x + "\"" +} +operationNotAllowed(s"Duplicated column names found in table definition of $name: " + +duplicateColumns.mkString("[", ",", "]"), ctx) + } + + // For Hive tables, partition columns must not be part of the schema + val badPartCols
[2/4] incubator-carbondata git commit: Initial commit
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/bedc96d0/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala -- diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala index 21864d1..16e35f4 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala @@ -17,13 +17,9 @@ package org.apache.spark.sql -import java.util.regex.{Matcher, Pattern} - import scala.collection.JavaConverters._ -import scala.collection.mutable.LinkedHashSet import scala.collection.mutable.Map import scala.language.implicitConversions -import scala.util.matching.Regex import org.apache.hadoop.hive.ql.lib.Node import org.apache.hadoop.hive.ql.parse._ @@ -31,154 +27,18 @@ import org.apache.spark.sql.catalyst._ import org.apache.spark.sql.catalyst.CarbonTableIdentifierImplicit._ import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.trees.CurrentOrigin import org.apache.spark.sql.execution.ExplainCommand import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.DescribeCommand import org.apache.spark.sql.hive.HiveQlWrapper -import org.apache.carbondata.common.logging.LogServiceFactory -import org.apache.carbondata.core.carbon.metadata.datatype.DataType -import org.apache.carbondata.core.constants.CarbonCommonConstants -import org.apache.carbondata.core.util.{CarbonProperties, DataTypeUtil} import org.apache.carbondata.spark.exception.MalformedCarbonCommandException import org.apache.carbondata.spark.util.CommonUtil /** * Parser for All Carbon DDL, DML cases in Unified context */ -class CarbonSqlParser() extends AbstractSparkSQLParser { - - val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName) - protected val AGGREGATE = carbonKeyWord("AGGREGATE") - protected val AS = carbonKeyWord("AS") - protected val AGGREGATION = carbonKeyWord("AGGREGATION") - protected val ALL = carbonKeyWord("ALL") - protected val HIGH_CARDINALITY_DIMS = carbonKeyWord("NO_DICTIONARY") - protected val BEFORE = carbonKeyWord("BEFORE") - protected val BY = carbonKeyWord("BY") - protected val CARDINALITY = carbonKeyWord("CARDINALITY") - protected val CASCADE = carbonKeyWord("CASCADE") - protected val CLASS = carbonKeyWord("CLASS") - protected val CLEAN = carbonKeyWord("CLEAN") - protected val COLS = carbonKeyWord("COLS") - protected val COLUMNS = carbonKeyWord("COLUMNS") - protected val CREATE = carbonKeyWord("CREATE") - protected val CUBE = carbonKeyWord("CUBE") - protected val CUBES = carbonKeyWord("CUBES") - protected val DATA = carbonKeyWord("DATA") - protected val DATABASE = carbonKeyWord("DATABASE") - protected val DATABASES = carbonKeyWord("DATABASES") - protected val DELETE = carbonKeyWord("DELETE") - protected val DELIMITER = carbonKeyWord("DELIMITER") - protected val DESCRIBE = carbonKeyWord("DESCRIBE") - protected val DESC = carbonKeyWord("DESC") - protected val DETAIL = carbonKeyWord("DETAIL") - protected val DIMENSIONS = carbonKeyWord("DIMENSIONS") - protected val DIMFOLDERPATH = carbonKeyWord("DIMFOLDERPATH") - protected val DROP = carbonKeyWord("DROP") - protected val ESCAPECHAR = carbonKeyWord("ESCAPECHAR") - protected val EXCLUDE = carbonKeyWord("EXCLUDE") - protected val EXPLAIN = carbonKeyWord("EXPLAIN") - protected val EXTENDED = carbonKeyWord("EXTENDED") - protected val FORMATTED = carbonKeyWord("FORMATTED") - protected val FACT = carbonKeyWord("FACT") - protected val FIELDS = carbonKeyWord("FIELDS") - protected val FILEHEADER = carbonKeyWord("FILEHEADER") - protected val SERIALIZATION_NULL_FORMAT = carbonKeyWord("SERIALIZATION_NULL_FORMAT") - protected val BAD_RECORDS_LOGGER_ENABLE = carbonKeyWord("BAD_RECORDS_LOGGER_ENABLE") - protected val BAD_RECORDS_ACTION = carbonKeyWord("BAD_RECORDS_ACTION") - protected val FILES = carbonKeyWord("FILES") - protected val FROM = carbonKeyWord("FROM") - protected val HIERARCHIES = carbonKeyWord("HIERARCHIES") - protected val IN = carbonKeyWord("IN") - protected val INCLUDE = carbonKeyWord("INCLUDE") - protected val INPATH = carbonKeyWord("INPATH") - protected val INTO = carbonKeyWord("INTO") - protected val LEVELS = carbonKeyWord("LEVELS") - protected val LIKE = carbonKeyWord("LIKE") - protected val LOAD = carbonKeyWord("LOAD") - protected val LOCAL = carbonKeyWord("LOCAL") - protected val MAPPED = carbonKeyWord("MAPPED") - protected val MEASURES = carbonKeyWord("MEASURES") - protected val MULTILINE = carbonKeyWord("MULTILINE") - protected val COMPLEX_DELIMITER_LEVEL_1 = carbonKeyWord("COMPLEX_DELIMITER_LEVEL_1") - protected val COMPLEX_DELIMITER_LEVEL_2
[3/4] incubator-carbondata git commit: Initial commit
Initial commit Added comments Fixed style and testcases Refactored code Fixed issue Rebased Rebased fixed comments fixed style Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/bedc96d0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/bedc96d0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/bedc96d0 Branch: refs/heads/master Commit: bedc96d059fe25ebec298220b92243e87c496a0c Parents: 28190eb Author: ravipesala Authored: Mon Dec 19 18:57:11 2016 +0530 Committer: jackylk Committed: Tue Dec 27 08:48:59 2016 +0800 -- .../carbondata/examples/CarbonExample.scala | 173 .../examples/CarbonSessionExample.scala | 144 +++ .../examples/SparkSessionExample.scala | 173 .../catalyst/AbstractCarbonSparkSQLParser.scala | 137 +++ .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 968 +++ .../execution/command/carbonTableSchema.scala | 8 +- .../org/apache/spark/sql/CarbonSqlParser.scala | 938 +- .../execution/command/carbonTableSchema.scala | 2 +- .../spark/sql/CarbonCatalystOperators.scala | 36 +- .../scala/org/apache/spark/sql/CarbonEnv.scala | 4 +- .../org/apache/spark/sql/CarbonSession.scala| 133 +++ .../org/apache/spark/sql/CarbonSource.scala | 12 +- .../execution/CarbonLateDecodeStrategy.scala| 6 +- .../sql/execution/command/DDLStrategy.scala | 83 ++ .../execution/command/carbonTableSchema.scala | 319 +- .../apache/spark/sql/hive/CarbonMetastore.scala | 9 + .../spark/sql/hive/CarbonSessionState.scala | 38 + .../sql/parser/CarbonSpark2SqlParser.scala | 125 +++ .../spark/sql/parser/CarbonSparkSqlParser.scala | 178 .../org/apache/spark/util/CleanFiles.scala | 2 +- .../org/apache/spark/util/Compaction.scala | 2 +- .../apache/spark/util/DeleteSegmentByDate.scala | 2 +- .../apache/spark/util/DeleteSegmentById.scala | 2 +- .../org/apache/spark/util/ShowSegments.scala| 2 +- .../org/apache/spark/util/TableLoader.scala | 2 +- .../spark/carbondata/util/QueryTest.scala | 0 .../sql/common/util/CarbonSessionTest.scala | 0 .../spark/sql/common/util/QueryTest.scala | 13 +- 28 files changed, 2366 insertions(+), 1145 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/bedc96d0/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala -- diff --git a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala b/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala deleted file mode 100644 index 273de95..000 --- a/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonExample.scala +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.examples - -import java.io.File - -import org.apache.commons.io.FileUtils -import org.apache.spark.sql.SparkSession -import org.apache.spark.util.{CleanFiles, ShowSegments} - -import org.apache.carbondata.core.constants.CarbonCommonConstants -import org.apache.carbondata.core.util.CarbonProperties - -object CarbonExample { - - def main(args: Array[String]): Unit = { -val rootPath = new File(this.getClass.getResource("/").getPath -+ "../../../..").getCanonicalPath -val storeLocation = s"$rootPath/examples/spark2/target/store" -val warehouse = s"$rootPath/examples/spark2/target/warehouse" -val metastoredb = s"$rootPath/examples/spark2/target/metastore_db" - -// clean data folder -if (true) { - val clean = (path: String) => FileUtils.deleteDirectory(new File(path)) - clean(storeLocation) - clean(warehouse) - clean(metastoredb) -} - -val spark = SparkSession -
[4/4] incubator-carbondata git commit: [CARBONDATA-547] Added CarbonSession and enabled parser to use carbon custom commands This closes #448
[CARBONDATA-547] Added CarbonSession and enabled parser to use carbon custom commands This closes #448 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/e8dcd429 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/e8dcd429 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/e8dcd429 Branch: refs/heads/master Commit: e8dcd4296b3eec453f100290afe39489edbd151f Parents: 28190eb bedc96d Author: jackylk Authored: Tue Dec 27 09:17:42 2016 +0800 Committer: jackylk Committed: Tue Dec 27 09:17:42 2016 +0800 -- .../carbondata/examples/CarbonExample.scala | 173 .../examples/CarbonSessionExample.scala | 144 +++ .../examples/SparkSessionExample.scala | 173 .../catalyst/AbstractCarbonSparkSQLParser.scala | 137 +++ .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 968 +++ .../execution/command/carbonTableSchema.scala | 8 +- .../org/apache/spark/sql/CarbonSqlParser.scala | 938 +- .../execution/command/carbonTableSchema.scala | 2 +- .../spark/sql/CarbonCatalystOperators.scala | 36 +- .../scala/org/apache/spark/sql/CarbonEnv.scala | 4 +- .../org/apache/spark/sql/CarbonSession.scala| 133 +++ .../org/apache/spark/sql/CarbonSource.scala | 12 +- .../execution/CarbonLateDecodeStrategy.scala| 6 +- .../sql/execution/command/DDLStrategy.scala | 83 ++ .../execution/command/carbonTableSchema.scala | 319 +- .../apache/spark/sql/hive/CarbonMetastore.scala | 9 + .../spark/sql/hive/CarbonSessionState.scala | 38 + .../sql/parser/CarbonSpark2SqlParser.scala | 125 +++ .../spark/sql/parser/CarbonSparkSqlParser.scala | 178 .../org/apache/spark/util/CleanFiles.scala | 2 +- .../org/apache/spark/util/Compaction.scala | 2 +- .../apache/spark/util/DeleteSegmentByDate.scala | 2 +- .../apache/spark/util/DeleteSegmentById.scala | 2 +- .../org/apache/spark/util/ShowSegments.scala| 2 +- .../org/apache/spark/util/TableLoader.scala | 2 +- .../spark/carbondata/util/QueryTest.scala | 0 .../sql/common/util/CarbonSessionTest.scala | 0 .../spark/sql/common/util/QueryTest.scala | 13 +- 28 files changed, 2366 insertions(+), 1145 deletions(-) --
[2/2] incubator-carbondata git commit: [CARBONDATA-540]Support insertInto without kettle for spark2 This closes #449
[CARBONDATA-540]Support insertInto without kettle for spark2 This closes #449 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/7788f468 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/7788f468 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/7788f468 Branch: refs/heads/master Commit: 7788f468c1fce106350d9368acdc4018d8928956 Parents: a011aaf 498cf98 Author: jackylk Authored: Wed Dec 28 15:18:25 2016 +0800 Committer: jackylk Committed: Wed Dec 28 15:18:25 2016 +0800 -- .../spark/rdd/NewCarbonDataLoadRDD.scala| 132 +++ .../readsupport/SparkRowReadSupportImpl.java| 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 22 +-- .../emptyrow/TestCSVHavingOnlySpaceChar.scala | 1 - .../testsuite/emptyrow/TestEmptyRows.scala | 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 36 +++-- .../apache/spark/sql/hive/CarbonMetastore.scala | 1 - .../InsertIntoCarbonTableTestCase.scala | 162 +++ .../carbondata/CarbonDataSourceSuite.scala | 1 + .../sql/common/util/CarbonSessionTest.scala | 0 .../store/CarbonFactDataHandlerColumnar.java| 29 +++- .../store/SingleThreadFinalSortFilesMerger.java | 2 +- .../store/writer/AbstractFactDataWriter.java| 19 ++- 13 files changed, 287 insertions(+), 122 deletions(-) --
[1/2] incubator-carbondata git commit: inserInto without kettle for spark2
Repository: incubator-carbondata Updated Branches: refs/heads/master a011aafb0 -> 7788f468c inserInto without kettle for spark2 fix comments Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/498cf982 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/498cf982 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/498cf982 Branch: refs/heads/master Commit: 498cf982995feba7012ce3993b3fd5172d2e5a15 Parents: a011aaf Author: QiangCai Authored: Mon Dec 19 11:01:40 2016 +0800 Committer: jackylk Committed: Wed Dec 28 14:45:18 2016 +0800 -- .../spark/rdd/NewCarbonDataLoadRDD.scala| 132 +++ .../readsupport/SparkRowReadSupportImpl.java| 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 22 +-- .../emptyrow/TestCSVHavingOnlySpaceChar.scala | 1 - .../testsuite/emptyrow/TestEmptyRows.scala | 2 +- .../spark/rdd/CarbonDataRDDFactory.scala| 36 +++-- .../apache/spark/sql/hive/CarbonMetastore.scala | 1 - .../InsertIntoCarbonTableTestCase.scala | 162 +++ .../carbondata/CarbonDataSourceSuite.scala | 1 + .../sql/common/util/CarbonSessionTest.scala | 0 .../store/CarbonFactDataHandlerColumnar.java| 29 +++- .../store/SingleThreadFinalSortFilesMerger.java | 2 +- .../store/writer/AbstractFactDataWriter.java| 19 ++- 13 files changed, 287 insertions(+), 122 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/498cf982/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala index 96bb5ed..64b8b61 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala @@ -18,20 +18,23 @@ package org.apache.carbondata.spark.rdd import java.io.{IOException, ObjectInputStream, ObjectOutputStream} +import java.nio.ByteBuffer import java.text.SimpleDateFormat import java.util import java.util.{Date, UUID} import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.util.control.NonFatal import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapreduce.{TaskAttemptID, TaskType} import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl -import org.apache.spark.{Partition, SparkContext, TaskContext} -import org.apache.spark.rdd.RDD +import org.apache.spark.{Partition, SparkContext, SparkEnv, TaskContext} +import org.apache.spark.rdd.{DataLoadCoalescedRDD, DataLoadPartitionWrap, RDD} import org.apache.spark.sql.Row import org.apache.spark.sql.execution.command.Partitioner +import org.apache.spark.util.SparkUtil import org.apache.carbondata.common.CarbonIterator import org.apache.carbondata.common.logging.LogServiceFactory @@ -41,12 +44,13 @@ import org.apache.carbondata.core.load.{BlockDetails, LoadMetadataDetails} import org.apache.carbondata.core.util.{CarbonProperties, CarbonTimeStatisticsFactory} import org.apache.carbondata.hadoop.csv.CSVInputFormat import org.apache.carbondata.hadoop.csv.recorditerator.RecordReaderIterator +import org.apache.carbondata.processing.csvreaderstep.JavaRddIterator import org.apache.carbondata.processing.model.CarbonLoadModel import org.apache.carbondata.processing.newflow.DataLoadExecutor import org.apache.carbondata.processing.newflow.exception.BadRecordFoundException import org.apache.carbondata.spark.DataLoadResult import org.apache.carbondata.spark.splits.TableSplit -import org.apache.carbondata.spark.util.CarbonQueryUtil +import org.apache.carbondata.spark.util.{CarbonQueryUtil, CarbonScalaUtil} class SerializableConfiguration(@transient var value: Configuration) extends Serializable { @@ -323,7 +327,7 @@ class NewDataFrameLoaderRDD[K, V]( loadCount: Integer, tableCreationTime: Long, schemaLastUpdatedTime: Long, - prev: RDD[Row]) extends RDD[(K, V)](prev) { + prev: DataLoadCoalescedRDD[Row]) extends RDD[(K, V)](prev) { override def compute(theSplit: Partition, context: TaskContext): Iterator[(K, V)] = { @@ -342,29 +346,25 @@ class NewDataFrameLoaderRDD[K, V]( carbonLoadModel.setSegmentId(String.valueOf(loadCo
[1/2] incubator-carbondata git commit: Added thrift server support to Spark 2.0 integration
Repository: incubator-carbondata Updated Branches: refs/heads/master 997920a0f -> 65b922126 Added thrift server support to Spark 2.0 integration Fixed comments Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/81eca096 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/81eca096 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/81eca096 Branch: refs/heads/master Commit: 81eca0967cc2a836f81278ff552057a1fc59a531 Parents: 997920a Author: ravipesala Authored: Wed Dec 28 09:56:48 2016 +0530 Committer: jackylk Committed: Wed Dec 28 21:12:03 2016 +0800 -- .../spark/thriftserver/CarbonThriftServer.scala | 64 .../org/apache/spark/sql/CarbonSession.scala| 46 +- .../execution/command/carbonTableSchema.scala | 4 +- 3 files changed, 110 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/81eca096/integration/spark2/src/main/scala/org/apache/carbondata/spark/thriftserver/CarbonThriftServer.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/spark/thriftserver/CarbonThriftServer.scala b/integration/spark2/src/main/scala/org/apache/carbondata/spark/thriftserver/CarbonThriftServer.scala new file mode 100644 index 000..6a6ee00 --- /dev/null +++ b/integration/spark2/src/main/scala/org/apache/carbondata/spark/thriftserver/CarbonThriftServer.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark.thriftserver + +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 + +import org.apache.carbondata.common.logging.LogServiceFactory +import org.apache.carbondata.core.util.CarbonProperties + +object CarbonThriftServer { + + def main(args: Array[String]): Unit = { + +import org.apache.spark.sql.CarbonSession._ + +val sparkConf = new SparkConf(loadDefaults = true) +val builder = SparkSession + .builder() + .config(sparkConf) + .appName("Carbon Thrift Server(uses CarbonSession)") + .enableHiveSupport() + +val sparkHome = System.getenv.get("SPARK_HOME") +if (null != sparkHome) { + builder.config("carbon.properties.filepath", +sparkHome + '/' + "conf" + '/' + "carbon.properties") + System.setProperty("carbon.properties.filepath", +sparkHome + '/' + "conf" + '/' + "carbon.properties") +} +CarbonProperties.getInstance().addProperty("carbon.storelocation", args.head) + +val spark = builder.getOrCreateCarbonSession() +val warmUpTime = CarbonProperties.getInstance().getProperty("carbon.spark.warmUpTime", "5000") +try { + Thread.sleep(Integer.parseInt(warmUpTime)) +} catch { + case e: Exception => +val LOG = LogServiceFactory.getLogService(this.getClass.getCanonicalName) +LOG.error(s"Wrong value for carbon.spark.warmUpTime $warmUpTime " + + "Using default Value and proceeding") +Thread.sleep(5000) +} + +HiveThriftServer2.startWithContext(spark.sqlContext) + } + +} http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/81eca096/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSession.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSession.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSession.scala index 67ee478..e654c0e 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSession.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSe
[2/2] incubator-carbondata git commit: [CARBONDATA-574] Added thrift server support to Spark 2.0 integration This closes #474
[CARBONDATA-574] Added thrift server support to Spark 2.0 integration This closes #474 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/65b92212 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/65b92212 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/65b92212 Branch: refs/heads/master Commit: 65b922126713db00ab28b2006d7e72b498364cf9 Parents: 997920a 81eca09 Author: jackylk Authored: Wed Dec 28 21:21:48 2016 +0800 Committer: jackylk Committed: Wed Dec 28 21:21:48 2016 +0800 -- .../spark/thriftserver/CarbonThriftServer.scala | 64 .../org/apache/spark/sql/CarbonSession.scala| 46 +- .../execution/command/carbonTableSchema.scala | 4 +- 3 files changed, 110 insertions(+), 4 deletions(-) --
[1/3] incubator-carbondata git commit: Added partitioner
Repository: incubator-carbondata Updated Branches: refs/heads/master 65b922126 -> dc7c86ef3 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/cbf87977/integration/spark2/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala -- diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala new file mode 100644 index 000..c480d30 --- /dev/null +++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.carbondata.bucketing + +import java.io.File + +import org.apache.commons.io.FileUtils +import org.apache.spark.sql.common.util.QueryTest +import org.apache.spark.sql.execution.command.LoadTable +import org.apache.spark.sql.execution.exchange.ShuffleExchange +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.carbon.metadata.CarbonMetadata +import org.apache.carbondata.core.carbon.metadata.schema.table.CarbonTable +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + +class TableBucketingTestCase extends QueryTest with BeforeAndAfterAll { + + override def beforeAll { + +// clean data folder +clean + +CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "/MM/dd") +spark.conf.set("spark.sql.autoBroadcastJoinThreshold", "-1") +spark.sql("DROP TABLE IF EXISTS t3") +spark.sql("DROP TABLE IF EXISTS t4") +spark.sql("DROP TABLE IF EXISTS t5") +spark.sql("DROP TABLE IF EXISTS t6") +spark.sql("DROP TABLE IF EXISTS t7") +spark.sql("DROP TABLE IF EXISTS t8") + } + + test("test create table with buckets") { +spark.sql( + """ + CREATE TABLE t4 + (ID Int, date Timestamp, country String, + name String, phonetype String, serialname String, salary Int) + USING org.apache.spark.sql.CarbonSource + OPTIONS("bucketnumber"="4", "bucketcolumns"="name", "tableName"="t4") + """) +LoadTable(Some("default"), "t4", "./src/test/resources/dataDiff.csv", Nil, + Map(("use_kettle", "false"))).run(spark) +val table: CarbonTable = CarbonMetadata.getInstance().getCarbonTable("default_t4") +if (table != null && table.getBucketingInfo("t4") != null) { + assert(true) +} else { + assert(false, "Bucketing info does not exist") +} + } + + test("test create table with no bucket join of carbon tables") { +spark.sql( + """ + CREATE TABLE t5 + (ID Int, date Timestamp, country String, + name String, phonetype String, serialname String, salary Int) + USING org.apache.spark.sql.CarbonSource + OPTIONS("tableName"="t5") + """) +LoadTable(Some("default"), "t5", "./src/test/resources/dataDiff.csv", Nil, + Map(("use_kettle", "false"))).run(spark) + +val plan = spark.sql( + """ +|select t1.*, t2.* +|from t5 t1, t5 t2 +|where t1.name = t2.name + """.stripMargin).queryExecution.executedPlan +var shuffleExists = false +plan.collect { + case s: ShuffleExchange => shuffleExists = true +} +assert(shuffleExists, "shuffle should exist on non bucket tables") + } + + test("test create table with bucket join of carbon tables") { +spark.sql( + """ + CREATE TABLE t6 + (ID Int, date Timestamp, country String, + name String, phonetype String, serialname String, salary Int) + USING org.apache.spark.sql.CarbonSource + OPTIONS("bucketnumber"="4", "bucketcolumns"="name", "tableName"="t6") + """) +LoadTable(Some("default"), "t6", "./src/test/resources/dataDiff.csv", Nil, + Map(("use_kettle", "false"))).run(spark) + +val plan = spark.sql( + """ +|select t1.*, t2.* +|from t6 t1, t6 t2 +|wher
[3/3] incubator-carbondata git commit: [CARBONDATA-467] Adding bucketing to carbon table loading This closes #358
[CARBONDATA-467] Adding bucketing to carbon table loading This closes #358 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/dc7c86ef Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/dc7c86ef Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/dc7c86ef Branch: refs/heads/master Commit: dc7c86ef388af86392667289eec68e46bc257128 Parents: 65b9221 cbf8797 Author: jackylk Authored: Wed Dec 28 22:31:55 2016 +0800 Committer: jackylk Committed: Wed Dec 28 22:31:55 2016 +0800 -- .../carbon/datastore/SegmentTaskIndexStore.java | 89 +-- .../ThriftWrapperSchemaConverterImpl.java | 30 +++ .../carbon/metadata/schema/BucketingInfo.java | 49 .../metadata/schema/table/CarbonTable.java | 14 + .../metadata/schema/table/TableSchema.java | 14 + .../core/carbon/path/CarbonTablePath.java | 37 ++- .../carbondata/core/partition/Partitioner.java | 26 ++ .../partition/impl/HashPartitionerImpl.java | 105 .../core/util/CarbonMetadataUtil.java | 4 +- .../apache/carbondata/core/util/CarbonUtil.java | 5 +- .../datastore/SegmentTaskIndexStoreTest.java| 8 +- .../CarbonFormatDirectoryStructureTest.java | 4 +- .../core/util/CarbonMetadataUtilTest.java | 3 +- format/src/main/thrift/carbondata_index.thrift | 1 + format/src/main/thrift/schema.thrift| 9 + .../carbondata/hadoop/CarbonInputFormat.java| 22 +- .../carbondata/hadoop/CarbonInputSplit.java | 18 ++ .../hadoop/CarbonMultiBlockSplit.java | 23 +- .../internal/index/impl/InMemoryBTreeIndex.java | 9 +- .../apache/carbondata/spark/CarbonOption.scala | 7 + .../carbondata/spark/rdd/CarbonMergerRDD.scala | 2 +- .../carbondata/spark/rdd/CarbonScanRDD.scala| 146 ++ .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 7 +- .../execution/command/carbonTableSchema.scala | 29 +- .../org/apache/spark/sql/CarbonSqlParser.scala | 11 +- .../apache/carbondata/spark/CarbonOption.scala | 7 + .../org/apache/spark/sql/CarbonSource.scala | 14 +- .../org/apache/spark/sql/TableCreator.scala | 6 +- .../execution/CarbonLateDecodeStrategy.scala| 42 ++- .../spark/sql/parser/CarbonSparkSqlParser.scala | 14 +- .../bucketing/TableBucketingTestCase.scala | 193 ++ .../newflow/CarbonDataLoadConfiguration.java| 11 + .../newflow/DataLoadProcessBuilder.java | 29 ++ .../processing/newflow/row/CarbonRow.java | 2 + ...arallelReadMergeSorterWithBucketingImpl.java | 265 +++ ...ConverterProcessorWithBucketingStepImpl.java | 189 + .../steps/DataWriterProcessorStepImpl.java | 79 +++--- .../newflow/steps/SortProcessorStepImpl.java| 11 +- .../sortandgroupby/sortdata/SortParameters.java | 28 ++ .../store/CarbonFactDataHandlerColumnar.java| 4 + .../store/CarbonFactDataHandlerModel.java | 9 +- .../store/SingleThreadFinalSortFilesMerger.java | 4 + .../store/writer/AbstractFactDataWriter.java| 6 +- .../store/writer/CarbonDataWriterVo.java| 9 + 44 files changed, 1417 insertions(+), 177 deletions(-) --
[2/3] incubator-carbondata git commit: Added partitioner
Added partitioner Added bucketing in load Added headers Bucketing is handled in load and query flow Fixed test case Rebased with master rebased Added bucketing in spark layer Rebased and fixed scala style Added test cases for bucketing in all scenerios. And fixed review comments rebased and fixed issues Rebased and fixed comments Rebased and fixed testcases Rebased and fixed testcases Fixed comments Rebased Fixed compilation issue Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/cbf87977 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/cbf87977 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/cbf87977 Branch: refs/heads/master Commit: cbf8797776c2f3be48efe029d858b37a37d29848 Parents: 65b9221 Author: ravipesala Authored: Sun Nov 27 16:58:55 2016 +0530 Committer: jackylk Committed: Wed Dec 28 22:14:11 2016 +0800 -- .../carbon/datastore/SegmentTaskIndexStore.java | 89 +-- .../ThriftWrapperSchemaConverterImpl.java | 30 +++ .../carbon/metadata/schema/BucketingInfo.java | 49 .../metadata/schema/table/CarbonTable.java | 14 + .../metadata/schema/table/TableSchema.java | 14 + .../core/carbon/path/CarbonTablePath.java | 37 ++- .../carbondata/core/partition/Partitioner.java | 26 ++ .../partition/impl/HashPartitionerImpl.java | 105 .../core/util/CarbonMetadataUtil.java | 4 +- .../apache/carbondata/core/util/CarbonUtil.java | 5 +- .../datastore/SegmentTaskIndexStoreTest.java| 8 +- .../CarbonFormatDirectoryStructureTest.java | 4 +- .../core/util/CarbonMetadataUtilTest.java | 3 +- format/src/main/thrift/carbondata_index.thrift | 1 + format/src/main/thrift/schema.thrift| 9 + .../carbondata/hadoop/CarbonInputFormat.java| 22 +- .../carbondata/hadoop/CarbonInputSplit.java | 18 ++ .../hadoop/CarbonMultiBlockSplit.java | 23 +- .../internal/index/impl/InMemoryBTreeIndex.java | 9 +- .../apache/carbondata/spark/CarbonOption.scala | 7 + .../carbondata/spark/rdd/CarbonMergerRDD.scala | 2 +- .../carbondata/spark/rdd/CarbonScanRDD.scala| 146 ++ .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 7 +- .../execution/command/carbonTableSchema.scala | 29 +- .../org/apache/spark/sql/CarbonSqlParser.scala | 11 +- .../apache/carbondata/spark/CarbonOption.scala | 7 + .../org/apache/spark/sql/CarbonSource.scala | 14 +- .../org/apache/spark/sql/TableCreator.scala | 6 +- .../execution/CarbonLateDecodeStrategy.scala| 42 ++- .../spark/sql/parser/CarbonSparkSqlParser.scala | 14 +- .../bucketing/TableBucketingTestCase.scala | 193 ++ .../newflow/CarbonDataLoadConfiguration.java| 11 + .../newflow/DataLoadProcessBuilder.java | 29 ++ .../processing/newflow/row/CarbonRow.java | 2 + ...arallelReadMergeSorterWithBucketingImpl.java | 265 +++ ...ConverterProcessorWithBucketingStepImpl.java | 189 + .../steps/DataWriterProcessorStepImpl.java | 79 +++--- .../newflow/steps/SortProcessorStepImpl.java| 11 +- .../sortandgroupby/sortdata/SortParameters.java | 28 ++ .../store/CarbonFactDataHandlerColumnar.java| 4 + .../store/CarbonFactDataHandlerModel.java | 9 +- .../store/SingleThreadFinalSortFilesMerger.java | 4 + .../store/writer/AbstractFactDataWriter.java| 6 +- .../store/writer/CarbonDataWriterVo.java| 9 + 44 files changed, 1417 insertions(+), 177 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/cbf87977/core/src/main/java/org/apache/carbondata/core/carbon/datastore/SegmentTaskIndexStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/carbon/datastore/SegmentTaskIndexStore.java b/core/src/main/java/org/apache/carbondata/core/carbon/datastore/SegmentTaskIndexStore.java index e2218a8..6ab18bb 100644 --- a/core/src/main/java/org/apache/carbondata/core/carbon/datastore/SegmentTaskIndexStore.java +++ b/core/src/main/java/org/apache/carbondata/core/carbon/datastore/SegmentTaskIndexStore.java @@ -18,6 +18,7 @@ */ package org.apache.carbondata.core.carbon.datastore; +import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -57,7 +58,8 @@ public class SegmentTaskIndexStore { * reason of so many map as each segment can have multiple data file and * each file will have its own btree */ - private Map>> tableSegmentMap; + private Map>> tableSegmentMap; /** * map of block info to lock object map, while loading the btree this wil
[2/2] incubator-carbondata git commit: [CARBONDATA-576] Add build guide to github This closes #477
[CARBONDATA-576] Add build guide to github This closes #477 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/fd9d1026 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/fd9d1026 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/fd9d1026 Branch: refs/heads/master Commit: fd9d1026c7e01e6a39ebc4b35d502574f9eb3232 Parents: 9d7afb5 0f783e4 Author: jackylk Authored: Thu Dec 29 15:19:18 2016 +0800 Committer: jackylk Committed: Thu Dec 29 15:19:18 2016 +0800 -- build/README.md | 58 1 file changed, 58 insertions(+) --
[1/2] incubator-carbondata git commit: add mvn build guide
Repository: incubator-carbondata Updated Branches: refs/heads/master 9d7afb589 -> fd9d1026c add mvn build guide add spark2.0 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/0f783e45 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/0f783e45 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/0f783e45 Branch: refs/heads/master Commit: 0f783e45522269dbfe0450d0da2b830fa6170898 Parents: 9d7afb5 Author: chenliang613 Authored: Wed Dec 28 17:49:56 2016 +0800 Committer: jackylk Committed: Thu Dec 29 15:18:46 2016 +0800 -- build/README.md | 58 1 file changed, 58 insertions(+) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/0f783e45/build/README.md -- diff --git a/build/README.md b/build/README.md new file mode 100644 index 000..600524c --- /dev/null +++ b/build/README.md @@ -0,0 +1,58 @@ + + +# Building CarbonData + +## Prerequisites +* Unix-like environment (Linux, Mac OS X) +* Git +* [Apache Maven (Recommend version 3.3 or later)](https://maven.apache.org/download.cgi) +* [Oracle Java 7 or 8](http://www.oracle.com/technetwork/java/javase/downloads/index.html) +* [Apache Thrift 0.9.3](https://thrift.apache.org/download) + +## Build release version +Note:Need install Apache Thrift +``` +mvn clean -DskipTests -Pbuild-with-format install +``` + +## Build dev version(snapshot version,clone from github) +Note:Already uploaded format.jar to snapshot repo for facilitating dev users, +so the compilation command works without "-Pbuild-with-format" + +Build without test,by default carbondata takes Spark 1.5.2 to build the project +``` +mvn -DskipTests clean package +``` + +Build with different supported versions of Spark. +``` +mvn -DskipTests -Pspark-1.5 -Dspark.version=1.5.1 clean package +mvn -DskipTests -Pspark-1.5 -Dspark.version=1.5.2 clean package + +mvn -DskipTests -Pspark-1.6 -Dspark.version=1.6.1 clean package +mvn -DskipTests -Pspark-1.6 -Dspark.version=1.6.2 clean package + +mvn -DskipTests -Pspark-2.0 -Dspark.version=2.0.2 clean package +``` + +Build with test +``` +mvn clean package +```
[2/2] incubator-carbondata git commit: [CARBONDATA-564]Remove unused dimension table csv file to make dictionary This closes #467
[CARBONDATA-564]Remove unused dimension table csv file to make dictionary This closes #467 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/aaf98edc Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/aaf98edc Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/aaf98edc Branch: refs/heads/master Commit: aaf98edcaf290a6b1d6f2b5618ea9caf3fa61f4d Parents: fd9d102 ff1bde1 Author: jackylk Authored: Thu Dec 29 15:41:26 2016 +0800 Committer: jackylk Committed: Thu Dec 29 15:41:26 2016 +0800 -- .../carbondata/spark/load/CarbonLoaderUtil.java | 2 - .../spark/util/GlobalDictionaryUtil.scala | 25 - .../execution/command/carbonTableSchema.scala | 6 -- .../spark/util/AllDictionaryTestCase.scala | 6 +- .../AutoHighCardinalityIdentifyTestCase.scala | 2 - ...GlobalDictionaryUtilConcurrentTestCase.scala | 4 +- .../util/GlobalDictionaryUtilTestCase.scala | 8 +-- .../execution/command/carbonTableSchema.scala | 6 -- .../processing/csvload/DataGraphExecuter.java | 59 .../dataprocessor/DataProcessTaskStatus.java| 13 - .../dataprocessor/IDataProcessStatus.java | 4 -- .../processing/model/CarbonLoadModel.java | 17 -- 12 files changed, 5 insertions(+), 147 deletions(-) --
[1/2] incubator-carbondata git commit: remove unused dimension table dict
Repository: incubator-carbondata Updated Branches: refs/heads/master fd9d1026c -> aaf98edca remove unused dimension table dict Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/ff1bde14 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/ff1bde14 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/ff1bde14 Branch: refs/heads/master Commit: ff1bde146f40551409708285e2b69b75837f20ba Parents: fd9d102 Author: Jay357089 Authored: Mon Dec 26 20:43:23 2016 +0800 Committer: jackylk Committed: Thu Dec 29 15:28:34 2016 +0800 -- .../carbondata/spark/load/CarbonLoaderUtil.java | 2 - .../spark/util/GlobalDictionaryUtil.scala | 25 - .../execution/command/carbonTableSchema.scala | 6 -- .../spark/util/AllDictionaryTestCase.scala | 6 +- .../AutoHighCardinalityIdentifyTestCase.scala | 2 - ...GlobalDictionaryUtilConcurrentTestCase.scala | 4 +- .../util/GlobalDictionaryUtilTestCase.scala | 8 +-- .../execution/command/carbonTableSchema.scala | 6 -- .../processing/csvload/DataGraphExecuter.java | 59 .../dataprocessor/DataProcessTaskStatus.java| 13 - .../dataprocessor/IDataProcessStatus.java | 4 -- .../processing/model/CarbonLoadModel.java | 17 -- 12 files changed, 5 insertions(+), 147 deletions(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ff1bde14/integration/spark-common/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java -- diff --git a/integration/spark-common/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java b/integration/spark-common/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java index 0d2ab6f..56ddce2 100644 --- a/integration/spark-common/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java +++ b/integration/spark-common/src/main/java/org/apache/carbondata/spark/load/CarbonLoaderUtil.java @@ -179,7 +179,6 @@ public final class CarbonLoaderUtil { DataProcessTaskStatus dataProcessTaskStatus = new DataProcessTaskStatus(databaseName, tableName); dataProcessTaskStatus.setCsvFilePath(loadModel.getFactFilePath()); -dataProcessTaskStatus.setDimCSVDirLoc(loadModel.getDimFolderPath()); if (loadModel.isDirectLoad()) { dataProcessTaskStatus.setFilesToProcess(loadModel.getFactFilesToProcess()); dataProcessTaskStatus.setDirectLoad(true); @@ -194,7 +193,6 @@ public final class CarbonLoaderUtil { dataProcessTaskStatus.setRddIteratorKey(loadModel.getRddIteratorKey()); dataProcessTaskStatus.setDateFormat(loadModel.getDateFormat()); SchemaInfo info = new SchemaInfo(); - info.setDatabaseName(databaseName); info.setTableName(tableName); info.setAutoAggregateRequest(loadModel.isAggLoadRequest()); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/ff1bde14/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala index e578488..3274e58 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/util/GlobalDictionaryUtil.scala @@ -772,31 +772,6 @@ object GlobalDictionaryUtil { } else { LOGGER.info("No column found for generating global dictionary in source data files") } -// generate global dict from dimension file -if (carbonLoadModel.getDimFolderPath != null) { - val fileMapArray = carbonLoadModel.getDimFolderPath.split(",") - for (fileMap <- fileMapArray) { -val dimTableName = fileMap.split(":")(0) -var dimDataframe = loadDataFrame(sqlContext, carbonLoadModel) -val (requireDimensionForDim, requireColumnNamesForDim) = - pruneDimensions(dimensions, dimDataframe.columns, dimDataframe.columns) -if (requireDimensionForDim.length >= 1) { - dimDataframe = dimDataframe.select(requireColumnNamesForDim.head, -requireColumnNamesForDim.tail: _*) - val modelforDim = createDictionaryLoadModel(carbonLoadModel, carbonTableIdentifier, -requireDimensionForDim, storePath, dictfolderPath, false) - val inputRDDforDim =
[1/2] incubator-carbondata git commit: fix load performace
Repository: incubator-carbondata Updated Branches: refs/heads/master aaf98edca -> 93f23ccb8 fix load performace Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/31582395 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/31582395 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/31582395 Branch: refs/heads/master Commit: 315823956a1418eff2977919413b344c6f5e42c7 Parents: aaf98ed Author: foryou2030 Authored: Fri Dec 23 16:46:21 2016 +0800 Committer: jackylk Committed: Thu Dec 29 15:46:27 2016 +0800 -- .../newflow/converter/impl/MeasureFieldConverterImpl.java| 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/31582395/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/MeasureFieldConverterImpl.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/MeasureFieldConverterImpl.java b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/MeasureFieldConverterImpl.java index c419d46..fa2d037 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/MeasureFieldConverterImpl.java +++ b/processing/src/main/java/org/apache/carbondata/processing/newflow/converter/impl/MeasureFieldConverterImpl.java @@ -22,6 +22,7 @@ import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.carbon.metadata.datatype.DataType; import org.apache.carbondata.core.carbon.metadata.schema.table.column.CarbonMeasure; +import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.util.DataTypeUtil; import org.apache.carbondata.processing.newflow.DataField; import org.apache.carbondata.processing.newflow.converter.BadRecordLogHolder; @@ -57,7 +58,8 @@ public class MeasureFieldConverterImpl implements FieldConverter { throws CarbonDataLoadingException { String value = row.getString(index); Object output; -if (value == null) { +boolean isNull = CarbonCommonConstants.MEMBER_DEFAULT_VAL.equals(value); +if (value == null || value.length() == 0 || isNull) { logHolder.setReason( "The value " + " \"" + value + "\"" + " with column name " + measure.getColName() + " and column data type " + dataType + " is not a valid " + dataType + " type.");
[2/2] incubator-carbondata git commit: [CARBONDATA-558] Fix load performace issue when use_kettle=false This closes #459
[CARBONDATA-558] Fix load performace issue when use_kettle=false This closes #459 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/93f23ccb Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/93f23ccb Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/93f23ccb Branch: refs/heads/master Commit: 93f23ccb8689aa48a6cfcba12ef39ec863a09590 Parents: aaf98ed 3158239 Author: jackylk Authored: Thu Dec 29 16:04:27 2016 +0800 Committer: jackylk Committed: Thu Dec 29 16:04:27 2016 +0800 -- .../newflow/converter/impl/MeasureFieldConverterImpl.java| 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) --
[1/3] incubator-carbondata git commit: WIP provide dictionary server/client framework
Repository: incubator-carbondata Updated Branches: refs/heads/master 241f45f8a -> 20a0b9ec5 http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/05b26549/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java index 38f8c79..6d9be67 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java +++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/PrimitiveDataType.java @@ -23,7 +23,9 @@ import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.carbondata.core.cache.Cache; import org.apache.carbondata.core.cache.dictionary.Dictionary; @@ -34,11 +36,15 @@ import org.apache.carbondata.core.carbon.metadata.schema.table.column.CarbonDime import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.devapi.BiDictionary; import org.apache.carbondata.core.devapi.DictionaryGenerationException; +import org.apache.carbondata.core.dictionary.client.DictionaryClient; +import org.apache.carbondata.core.dictionary.generator.key.DictionaryKey; import org.apache.carbondata.core.keygenerator.KeyGenException; import org.apache.carbondata.core.keygenerator.KeyGenerator; import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory; +import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.core.util.CarbonUtilException; import org.apache.carbondata.core.util.DataTypeUtil; +import org.apache.carbondata.processing.newflow.dictionary.DictionaryServerClientDictionary; import org.apache.carbondata.processing.newflow.dictionary.DirectDictionary; import org.apache.carbondata.processing.newflow.dictionary.PreCreatedDictionary; import org.apache.carbondata.processing.surrogatekeysgenerator.csvbased.CarbonCSVBasedDimSurrogateKeyGen; @@ -116,8 +122,10 @@ public class PrimitiveDataType implements GenericDataType { * @param columnId */ public PrimitiveDataType(String name, String parentname, String columnId, - CarbonDimension carbonDimension, Cache cache, - CarbonTableIdentifier carbonTableIdentifier) { + CarbonDimension carbonDimension, + Cache cache, + CarbonTableIdentifier carbonTableIdentifier, + DictionaryClient client, Boolean useOnePass, String storePath) { this.name = name; this.parentname = parentname; this.columnId = columnId; @@ -130,8 +138,33 @@ public class PrimitiveDataType implements GenericDataType { dictionaryGenerator = new DirectDictionary(DirectDictionaryKeyGeneratorFactory .getDirectDictionaryGenerator(carbonDimension.getDataType())); } else { -Dictionary dictionary = cache.get(identifier); -dictionaryGenerator = new PreCreatedDictionary(dictionary); +Dictionary dictionary = null; +if (useOnePass) { + if (CarbonUtil.isFileExistsForGivenColumn(storePath, identifier)) { +try { + dictionary = cache.get(identifier); +} catch (CarbonUtilException e) { + throw new RuntimeException(e); +} + } + String threadNo = "initial"; + DictionaryKey dictionaryKey = new DictionaryKey(); + dictionaryKey.setColumnName(carbonDimension.getColName()); + dictionaryKey.setTableUniqueName(carbonTableIdentifier.getTableUniqueName()); + dictionaryKey.setThreadNo(threadNo); + // for table initialization + dictionaryKey.setType("TABLE_INTIALIZATION"); + dictionaryKey.setData("0"); + client.getDictionary(dictionaryKey); + Map localCache = new HashMap<>(); + // for generate dictionary + dictionaryKey.setType("DICTIONARY_GENERATION"); + dictionaryGenerator = new DictionaryServerClientDictionary(dictionary, client, + dictionaryKey, localCache); +} else { + dictionary = cache.get(identifier); + dictionaryGenerator = new PreCreatedDictionary(dictionary); +} } } catch (CarbonUtilException e) { throw new RuntimeException(e); http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/05b26549/processing/src/main/java/org/apache/carbondata/processing/model/CarbonLoadModel.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/model/CarbonLoadModel.
[3/3] incubator-carbondata git commit: [CARBONDATA-401] One Pass Load This closes #310
[CARBONDATA-401] One Pass Load This closes #310 Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/20a0b9ec Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/20a0b9ec Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/20a0b9ec Branch: refs/heads/master Commit: 20a0b9ec55749aff6b63ab9d1d1721af0806e483 Parents: 241f45f 05b2654 Author: jackylk Authored: Thu Dec 29 22:35:01 2016 +0800 Committer: jackylk Committed: Thu Dec 29 22:35:01 2016 +0800 -- .../AbstractColumnDictionaryInfo.java | 2 +- .../dictionary/AbstractDictionaryCache.java | 25 -- .../cache/dictionary/ColumnDictionaryInfo.java | 2 +- .../dictionary/ColumnReverseDictionaryInfo.java | 2 +- .../cache/dictionary/ForwardDictionary.java | 4 +- .../cache/dictionary/ReverseDictionary.java | 4 +- .../metadata/schema/table/CarbonTable.java | 62 +++-- .../core/constants/CarbonCommonConstants.java | 10 + .../dictionary/client/DictionaryClient.java | 92 +++ .../client/DictionaryClientHandler.java | 109 + .../dictionary/generator/DictionaryWriter.java | 29 +++ .../IncrementalColumnDictionaryGenerator.java | 241 +++ .../generator/ServerDictionaryGenerator.java| 74 ++ .../generator/TableDictionaryGenerator.java | 116 + .../dictionary/generator/key/DictionaryKey.java | 92 +++ .../dictionary/generator/key/KryoRegister.java | 68 ++ .../dictionary/server/DictionaryServer.java | 93 +++ .../server/DictionaryServerHandler.java | 108 + .../apache/carbondata/core/util/CarbonUtil.java | 27 +++ .../spark/util/GlobalDictionaryUtil.scala | 4 +- .../spark/sql/catalyst/CarbonDDLSqlParser.scala | 3 +- .../spark/rdd/CarbonDataRDDFactory.scala| 15 ++ .../execution/command/carbonTableSchema.scala | 79 +- .../test/resources/columndictionary/country.csv | 5 + .../test/resources/columndictionary/name.csv| 10 + .../spark/src/test/resources/dataIncrement.csv | 21 ++ .../complexType/TestCreateTableWithDouble.scala | 2 + .../dataload/TestLoadDataWithSinglePass.scala | 114 + .../filterexpr/FilterProcessorTestCase.scala| 2 +- .../processing/datatypes/PrimitiveDataType.java | 41 +++- .../processing/model/CarbonLoadModel.java | 45 .../newflow/CarbonDataLoadConfiguration.java| 42 .../newflow/DataLoadProcessBuilder.java | 11 + .../impl/DictionaryFieldConverterImpl.java | 52 +++- .../converter/impl/FieldEncoderFactory.java | 23 +- .../converter/impl/RowConverterImpl.java| 45 +++- .../DictionaryServerClientDictionary.java | 95 37 files changed, 1692 insertions(+), 77 deletions(-) --