carbondata git commit: [CARBONDATA-3047] Added fallback mechanism when offheap memory is not available in UnsafeMemoryManager
Repository: carbondata Updated Branches: refs/heads/master 917f34421 -> bbbe47905 [CARBONDATA-3047] Added fallback mechanism when offheap memory is not available in UnsafeMemoryManager Changes Proposed In this PR: Currently when unsafe working memory is not available UnsafeMemoryManager is throwing MemoryException and killing the running task. To make system more easier for the user now added fallback to heap when offheap memory is not available This closes #2841 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bbbe4790 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bbbe4790 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bbbe4790 Branch: refs/heads/master Commit: bbbe47905f90b518a6ca670848bd4370f3504e8e Parents: 917f344 Author: kumarvishal09 Authored: Sat Oct 27 02:01:11 2018 +0530 Committer: ravipesala Committed: Sat Oct 27 07:37:36 2018 +0530 -- .../core/memory/UnsafeMemoryManager.java| 94 +++- 1 file changed, 34 insertions(+), 60 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/bbbe4790/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java b/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java index db0258f..7ccbc3f 100644 --- a/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java +++ b/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java @@ -41,7 +41,7 @@ public class UnsafeMemoryManager { private static boolean offHeap = Boolean.parseBoolean(CarbonProperties.getInstance() .getProperty(CarbonCommonConstants.ENABLE_OFFHEAP_SORT, CarbonCommonConstants.ENABLE_OFFHEAP_SORT_DEFAULT)); - private static Map> taskIdToMemoryBlockMap; + private static Map> taskIdToOffheapMemoryBlockMap; static { long size = 0L; String configuredWorkingMemorySize = null; @@ -67,7 +67,7 @@ public class UnsafeMemoryManager { } } } catch (Exception e) { - LOGGER.info("Invalid working memory size value: " + configuredWorkingMemorySize); + LOGGER.info("Invalid offheap working memory size value: " + configuredWorkingMemorySize); } long takenSize = size; MemoryType memoryType; @@ -77,25 +77,18 @@ public class UnsafeMemoryManager { if (takenSize < defaultSize) { takenSize = defaultSize; LOGGER.warn(String.format( -"It is not recommended to set unsafe working memory size less than %sMB," +"It is not recommended to set offheap working memory size less than %sMB," + " so setting default value to %d", CarbonCommonConstants.UNSAFE_WORKING_MEMORY_IN_MB_DEFAULT, defaultSize)); } takenSize = takenSize * 1024 * 1024; } else { - long maxMemory = Runtime.getRuntime().maxMemory() * 60 / 100; - if (takenSize == 0L) { -takenSize = maxMemory; - } else { -takenSize = takenSize * 1024 * 1024; -if (takenSize > maxMemory) { - takenSize = maxMemory; -} - } + // For ON-HEAP case not considering any size as it will based on max memory(Xmx) given to + // JVM and JVM will take care of freeing the memory memoryType = MemoryType.ONHEAP; } INSTANCE = new UnsafeMemoryManager(takenSize, memoryType); -taskIdToMemoryBlockMap = new HashMap<>(); +taskIdToOffheapMemoryBlockMap = new HashMap<>(); } public static final UnsafeMemoryManager INSTANCE; @@ -109,50 +102,57 @@ public class UnsafeMemoryManager { private UnsafeMemoryManager(long totalMemory, MemoryType memoryType) { this.totalMemory = totalMemory; this.memoryType = memoryType; -LOGGER.info( -"Working Memory manager is created with size " + totalMemory + " with " + memoryType); +LOGGER.info("offheap Working Memory manager is created with size " + totalMemory + " with " ++ memoryType); } private synchronized MemoryBlock allocateMemory(MemoryType memoryType, String taskId, long memoryRequested) { -if (memoryUsed + memoryRequested <= totalMemory) { - MemoryBlock memoryBlock = getMemoryAllocator(memoryType).allocate(memoryRequested); +MemoryBlock memoryBlock; +if (memoryUsed + memoryRequested <= totalMemory && memoryType == MemoryType.OFFHEAP) { + memoryBlock = MemoryAllocator.UNSAFE.allocate(memoryRequested); memoryUsed += memoryBlock.size(); - S
carbondata git commit: [CARBONDATA-3039] Fix Custom Deterministic Expression for rand() UDF
Repository: carbondata Updated Branches: refs/heads/master 58ba45ef8 -> 917f34421 [CARBONDATA-3039] Fix Custom Deterministic Expression for rand() UDF Fix Custom Deterministic Expression for rand() UDF This closes #2845 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/917f3442 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/917f3442 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/917f3442 Branch: refs/heads/master Commit: 917f34421b0dfcc44dd1efec6745db6c68eddbd5 Parents: 58ba45e Author: Indhumathi27 Authored: Tue Oct 23 11:29:51 2018 +0530 Committer: ravipesala Committed: Sat Oct 27 07:30:06 2018 +0530 -- .../spark/sql/CustomDeterministicExpression.scala | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/917f3442/integration/spark2/src/main/scala/org/apache/spark/sql/CustomDeterministicExpression.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CustomDeterministicExpression.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CustomDeterministicExpression.scala index 1ff9bc3..8a37989 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/CustomDeterministicExpression.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CustomDeterministicExpression.scala @@ -26,15 +26,17 @@ import org.apache.spark.sql.types.{DataType, StringType} * Custom expression to override the deterministic property . */ case class CustomDeterministicExpression(nonDt: Expression ) extends Expression with Serializable{ - override def nullable: Boolean = true + override def nullable: Boolean = nonDt.nullable - override def eval(input: InternalRow): Any = null + override def eval(input: InternalRow): Any = nonDt.eval(input) - override def dataType: DataType = StringType + override def dataType: DataType = nonDt.dataType - override def children: Seq[Expression] = Seq() + override def children: Seq[Expression] = nonDt.children - def childexp : Expression = nonDt + def childexp: Expression = nonDt + + override def genCode(ctx: CodegenContext): ExprCode = nonDt.genCode(ctx) override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = ev.copy("") }
carbondata git commit: [CARBONDATA-3048] Added Lazy Loading For 2.2/2.1
Repository: carbondata Updated Branches: refs/heads/master 170c2f56d -> 58ba45ef8 [CARBONDATA-3048] Added Lazy Loading For 2.2/2.1 Problem: Currently in 2.2/2.1 For Direct fill Lazy loading is not added because of this when data is huge and number of columns are high query is taking more time Lazy to execute. Solution Add Lazy loading for 2.2 and 2.1 Fixed Local Dictionary test case failure when it is enabled This closes #2846 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/58ba45ef Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/58ba45ef Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/58ba45ef Branch: refs/heads/master Commit: 58ba45ef89c4794a3214ad52f4a3b8aa89e175a3 Parents: 170c2f5 Author: kumarvishal09 Authored: Tue Oct 23 22:15:14 2018 +0530 Committer: ravipesala Committed: Sat Oct 27 07:10:00 2018 +0530 -- .../impl/LocalDictDimensionDataChunkStore.java | 20 +- .../safe/AbstractNonDictionaryVectorFiller.java | 221 +++ .../SafeFixedLengthDimensionDataChunkStore.java | 2 +- ...feVariableLengthDimensionDataChunkStore.java | 13 +- .../adaptive/AdaptiveDeltaFloatingCodec.java| 2 +- .../adaptive/AdaptiveDeltaIntegralCodec.java| 2 +- .../adaptive/AdaptiveFloatingCodec.java | 2 +- .../adaptive/AdaptiveIntegralCodec.java | 4 +- .../encoding/compress/DirectCompressCodec.java | 4 +- .../ColumnarVectorWrapperDirectFactory.java | 5 +- ...erDirectWithDeleteDeltaAndInvertedIndex.java | 8 +- .../apache/carbondata/core/util/ByteUtil.java | 2 +- .../detailquery/CastColumnTestCase.scala| 2 +- .../carbondata/spark/rdd/CarbonScanRDD.scala| 8 +- .../vectorreader/ColumnarVectorWrapper.java | 2 +- .../ColumnarVectorWrapperDirect.java| 2 +- .../org/apache/spark/sql/CarbonVectorProxy.java | 586 --- 17 files changed, 499 insertions(+), 386 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/58ba45ef/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java index e70424f..a384743 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java @@ -17,11 +17,15 @@ package org.apache.carbondata.core.datastore.chunk.store.impl; +import java.util.BitSet; + import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastore.chunk.store.DimensionDataChunkStore; import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.scan.result.vector.CarbonDictionary; import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertableVector; import org.apache.carbondata.core.util.CarbonUtil; /** @@ -61,16 +65,24 @@ public class LocalDictDimensionDataChunkStore implements DimensionDataChunkStore vector.setDictionary(dictionary); dictionary.setDictionaryUsed(); } +BitSet nullBitset = new BitSet(); +CarbonColumnVector dictionaryVector = ColumnarVectorWrapperDirectFactory +.getDirectVectorWrapperFactory(vector.getDictionaryVector(), invertedIndex, nullBitset, +vectorInfo.deletedRows, false, true); +vector = ColumnarVectorWrapperDirectFactory +.getDirectVectorWrapperFactory(vector, invertedIndex, nullBitset, vectorInfo.deletedRows, +false, false); for (int i = 0; i < rowsNum; i++) { int surrogate = CarbonUtil.getSurrogateInternal(data, i * columnValueSize, columnValueSize); if (surrogate == CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY) { vector.putNull(i); -vector.getDictionaryVector().putNull(i); +dictionaryVector.putNull(i); } else { -vector.putNotNull(i); -vector.getDictionaryVector().putInt(i, surrogate); +dictionaryVector.putInt(i, surrogate); } - +} +if (dictionaryVector instanceof ConvertableVector) { + ((ConvertableVector) dictionaryVector).convert(); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/58ba45ef/core/sr
svn commit: r30128 - /release/carbondata/1.5.0/
Author: ravipesala Date: Thu Oct 18 08:03:42 2018 New Revision: 30128 Log: Uploading 1.5.0 binaries Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar (with props) release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.asc release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.md5 release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.sha512 release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.3.2-hadoop2.7.2.jar (with props) release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.3.2-hadoop2.7.2.jar.asc release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.3.2-hadoop2.7.2.jar.md5 release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.3.2-hadoop2.7.2.jar.sha512 Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar == Binary file - no diff available. Propchange: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar -- svn:mime-type = application/octet-stream Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.asc == --- release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.asc (added) +++ release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.asc Thu Oct 18 08:03:42 2018 @@ -0,0 +1,16 @@ +-BEGIN PGP SIGNATURE- + +iQIzBAEBCgAdFiEER3EpqJTxH7zLwCVHutcqeKexsu4FAlvIPaEACgkQutcqeKex +su7rChAAx50n7vDDksee6LQ90rBBWjHtOhFtt4XJT8a8x5Y0Ke633fQhgykmG2wr +G6ZWeFeld9sEnoQ176Bi7ejxeIREWyfgBVVk68CWmIT4FS702zPRUENlPunkrsqu +QXHnY4irNyqbpn2rTqVk6cu6oN7BH5wNke8xhihnamManJ27YeI6sTJ0kxK8j0H/ +e6t4h10goc59/IIQbFenWthqq86b6G1SV+BQYXv5Ym+FlIqP7SHFrDEISr+fg66K +BuaH/VmA2Bm8rK86easXhv5QzxRm2f7CGRb2Doyw3YYynxvSKo5FR5hBO52Pq35N +NhRisemz+vYMcIBFnUOzPeewh1gqd+JuHS3hI63tDw1hjDJkXNhVeyoDzdd/Keie +4VlZVQlekbq50OVZ+riD+V203+dW12DeMxRxXn2yYgzXtL2oKcfmCXobSf0Is/d/ +TkZtSMwg9BLCsUeTs+Ozq5eA8zA3s5ialV0ShiJYgeu5jbk4NB9veuqqd0AGfSZp +fdfUjOkUFCsWuyvNvtRqgq/W1/wg/BI+CmxSpUIyHH8PUlm5FZtRYTvFtiNdOGLa +LtGbP4hfEp9ELF6NXE5ZFxNVlekLS9VTHV+6Z9v5QgLQD8QXOgiLLOyEnt4h6ZlW +AVB3JRZU93MsgyTwB8Bpp1yIJzprI9jrFhlEJI6bYjqkLyC2YkA= +=oosR +-END PGP SIGNATURE- Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.md5 == --- release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.md5 (added) +++ release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.md5 Thu Oct 18 08:03:42 2018 @@ -0,0 +1 @@ +649ea50bc879ec3775363e411251c93b apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.sha512 == --- release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.sha512 (added) +++ release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar.sha512 Thu Oct 18 08:03:42 2018 @@ -0,0 +1 @@ +e6e9180322da04dcd5c0980d1dbf38c5e6206b17ba21cae3732fb83a9e38eb368585e86d0c652a2551fef44a2ffea61769563a52f4a5695b48516d42805de688 apache-carbondata-1.5.0-bin-spark2.1.0-hadoop2.7.2.jar Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.3.2-hadoop2.7.2.jar == Binary file - no diff available. Propchange: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.3.2-hadoop2.7.2.jar -- svn:mime-type = application/octet-stream Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.3.2-hadoop2.7.2.jar.asc == --- release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.3.2-hadoop2.7.2.jar.asc (added) +++ release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.3.2-hadoop2.7.2.jar.asc Thu Oct 18 08:03:42 2018 @@ -0,0 +1,16 @@ +-BEGIN PGP SIGNATURE- + +iQIzBAEBCgAdFiEER3EpqJTxH7zLwCVHutcqeKexsu4FAlvIPKgACgkQutcqeKex +su7zbQ//aDqe9jiuLhONAi+hC/qdVBzM1De2J+6f3P5h/kycI1+Evfs9PKRJ0S9z +DrdPpnE9SNEn+NOpBXYPdc6T5jmGgVP9TqpCJXCyEjsnog6gyVjcHTTlJ9Z1OgMl +dOGwUZ8/F8JloL4QVYAZn1lrkd0Qx3iotzPqcQhihYiAXzivGSsceJCAPgOItTj+ +wM5NPha3h1HtTKI6zsJDrwMctTQ4InPKX0ND3AejlZIjc+7np+HgaqtY9StFHf7c +txDfUqsA77bIcNXtp9FGBJMvWxwIqd+snw431/AsS4TIhvN+uVaRXnTMb8rkTe4W +7bCLPXd7cch0rE/aXUsvyyY2okz2ZqVW1oEeBbIID2/zLuLjtahWWAerzYJdWB5g +HgLzkchhzuWi4S2U7hkksNj2eDcJ0MeyR1oLc/41RV1zSwWzBLViMcCW/QCSyAZG
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.3.0 [created] dade91bde
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.3.0-rc2 [deleted] dade91bde
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.3.1 [created] a1f6cc4c5
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.3.1-rc1 [deleted] a1f6cc4c5
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.4.0 [created] daa260242
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.4.0-rc2 [deleted] daa260242
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.5.0 [created] d47ed1634
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.5.0-rc2 [deleted] d47ed1634
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.4.1 [created] d3e58359c
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.4.1-rc2 [deleted] d3e58359c
svn commit: r30072 - /release/carbondata/1.5.0/
Author: ravipesala Date: Mon Oct 15 14:22:37 2018 New Revision: 30072 Log: Upload 1.5.0 Added: release/carbondata/1.5.0/ release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar (with props) release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.asc release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.md5 release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.sha512 release/carbondata/1.5.0/apache-carbondata-1.5.0-source-release.zip (with props) release/carbondata/1.5.0/apache-carbondata-1.5.0-source-release.zip.asc release/carbondata/1.5.0/apache-carbondata-1.5.0-source-release.zip.md5 release/carbondata/1.5.0/apache-carbondata-1.5.0-source-release.zip.sha512 Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar == Binary file - no diff available. Propchange: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar -- svn:mime-type = application/octet-stream Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.asc == --- release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.asc (added) +++ release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.asc Mon Oct 15 14:22:37 2018 @@ -0,0 +1,16 @@ +-BEGIN PGP SIGNATURE- + +iQIzBAEBCgAdFiEER3EpqJTxH7zLwCVHutcqeKexsu4FAlvEoY0ACgkQutcqeKex +su6/Hw//U70qW24DV+D2Vm3qgYzdNFgECZ5WrF4p5lbJHCKtZhVuPh9bGH6shszb +jdVs1/RhMVLL5i6BzfqzTejT5cCwlvoO1jK0F2TP14LTcnu8QT0PqBpcIFr0yLXm +vAwfxjUtJmza/K9tRhTy7i7jSeFYOXQxf5mlCSo8qsdipmdNPv1vg53eNBsxs9Ne +iYB93p8e71vs8vXH+5V0iG+NHzb8BIZ9OE8ElFQDQ6Z+TuA6FwNLlnzqYXT7NXME +GHoOJ0TSNPlIjxRqM0KOh8W7OFz3qRoWJ0w+OMuP3dXkl0e6siUpdCheh9Sixh1B +/cev29O9UtTn2D6Rnr9DQ3MIFYadWCvZHAHrrOqgabPsEYoapKLlHSF4M/3k4w+E +mc9xYlGXiBiknntnJ0yYU1c6Hz02e4WZeGT2V5mPUvJLxm/0cwAMol1L4kv8cYZ7 ++CeIIhWvYJWrhGnd7YSAo/NVZLlUKysUJfVL2+rjDTFTWPdHcOfb9fwsvWR+3pzs +1V4occRlpIRKuW6om3qRKYdbuo3rR6LUsnyvBAn+LG//TnYXb/COuNyLTg3DV7HP +841dHLIqwSUc1z+12qqZOsmdpSzIndQlxx4tNjH9mcRkEP/R7k1uLWObVZdbioEN +RCYtuqcqqcBrFkjBYT6Vu2EjPJmKsv8PZnt/QSeYjrsKMBJOpx0= +=yoER +-END PGP SIGNATURE- Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.md5 == --- release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.md5 (added) +++ release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.md5 Mon Oct 15 14:22:37 2018 @@ -0,0 +1 @@ +358508403a93f67fbaaa7eae26576a02 apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.sha512 == --- release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.sha512 (added) +++ release/carbondata/1.5.0/apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar.sha512 Mon Oct 15 14:22:37 2018 @@ -0,0 +1 @@ +bbe226f018c2ac1fcb00a670130134e23b054745941d9d98e11cf481b27e13703ae206c1c29a1b36145e5499393c4ff48f4504baa316fdf5105f00ab01a1083a apache-carbondata-1.5.0-bin-spark2.2.1-hadoop2.7.2.jar Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-source-release.zip == Binary file - no diff available. Propchange: release/carbondata/1.5.0/apache-carbondata-1.5.0-source-release.zip -- svn:mime-type = application/octet-stream Added: release/carbondata/1.5.0/apache-carbondata-1.5.0-source-release.zip.asc == --- release/carbondata/1.5.0/apache-carbondata-1.5.0-source-release.zip.asc (added) +++ release/carbondata/1.5.0/apache-carbondata-1.5.0-source-release.zip.asc Mon Oct 15 14:22:37 2018 @@ -0,0 +1,16 @@ +-BEGIN PGP SIGNATURE- + +iQIzBAABCgAdFiEER3EpqJTxH7zLwCVHutcqeKexsu4FAlu8zuUACgkQutcqeKex +su4F3RAAghG9aRYNv5HI7KeMuf7Y5qBT1zXu6YhnZC2s6O7cEJR1zY+MR7PSwo6N +lTZD1Py853b1Q4qdGjeU+oW0LFa3uHtW0Myi9yOfHur77GMorXEjA12/0a4uTmaQ +Sa+e4FWA0nG9qzijoy8qwa2eEhOyZX8VR3Ytv1ld6jAZt54O8YUY7Yec1rx6OcCS +/Cy6CS1DKRHVSY/ivv/AwFQH15iqZtrKc82y0cTfOFL529hf73xQfcc8ux7DOyXG +uFvDn8DoQdVviGs4Sphjern7m62FOW9iz8zSVItr+9sgVLMeN4gyIPlVBUcC42uU +UFMTN4lrVqamAw2RmCTEge8xkT7qAx4PWuK9P9pu4dCkoh6KpJGrV0I13JeJuBdD +ojZkBcrNLJkEyJV7CZZn3I/+9IKd8U7zO/siUEIqCPCTt74TzEZt7uFq9W6uVUMa +L87mho51yXytEF2rLy5B+o//99sashdu7SjJYsqBHb5LZu8+KHN6TEhWksocZJYf +2H+Ucov/i3q
carbondata git commit: [DOC] Add document for TABLE_BLOCKLET_SIZE
Repository: carbondata Updated Branches: refs/heads/master 3c7b33992 -> 15d38260c [DOC] Add document for TABLE_BLOCKLET_SIZE This closes #2801 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/15d38260 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/15d38260 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/15d38260 Branch: refs/heads/master Commit: 15d38260c0252eee5f80199b61827bd9ca781f65 Parents: 3c7b339 Author: Jacky Li Authored: Mon Oct 8 16:14:33 2018 +0800 Committer: ravipesala Committed: Wed Oct 10 11:27:29 2018 +0530 -- docs/ddl-of-carbondata.md | 59 +- 1 file changed, 35 insertions(+), 24 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/15d38260/docs/ddl-of-carbondata.md -- diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md index c1a891d..933a448 100644 --- a/docs/ddl-of-carbondata.md +++ b/docs/ddl-of-carbondata.md @@ -74,7 +74,7 @@ CarbonData DDL statements are documented here,which includes: [TBLPROPERTIES (property_name=property_value, ...)] [LOCATION 'path'] ``` - + **NOTE:** CarbonData also supports "STORED AS carbondata" and "USING carbondata". Find example code at [CarbonSessionExample](https://github.com/apache/carbondata/blob/master/examples/spark2/src/main/scala/org/apache/carbondata/examples/CarbonSessionExample.scala) in the CarbonData repo. ### Usage Guidelines @@ -87,19 +87,20 @@ CarbonData DDL statements are documented here,which includes: | [SORT_COLUMNS](#sort-columns-configuration) | Columns to include in sort and its order of sort | | [SORT_SCOPE](#sort-scope-configuration) | Sort scope of the load.Options include no sort, local sort ,batch sort and global sort | | [TABLE_BLOCKSIZE](#table-block-size-configuration) | Size of blocks to write onto hdfs| +| [TABLE_BLOCKLET_SIZE](#table-blocklet-size-configuration)| Size of blocklet to write in the file| | [MAJOR_COMPACTION_SIZE](#table-compaction-configuration) | Size upto which the segments can be combined into one| | [AUTO_LOAD_MERGE](#table-compaction-configuration) | Whether to auto compact the segments | | [COMPACTION_LEVEL_THRESHOLD](#table-compaction-configuration) | Number of segments to compact into one segment | | [COMPACTION_PRESERVE_SEGMENTS](#table-compaction-configuration) | Number of latest segments that needs to be excluded from compaction | | [ALLOWED_COMPACTION_DAYS](#table-compaction-configuration) | Segments generated within the configured time limit in days will be compacted, skipping others | -| [streaming](#streaming) | Whether the table is a streaming table | +| [STREAMING](#streaming) | Whether the table is a streaming table | | [LOCAL_DICTIONARY_ENABLE](#local-dictionary-configuration) | Enable local dictionary generation | | [LOCAL_DICTIONARY_THRESHOLD](#local-dictionary-configuration) | Cardinality upto which the local dictionary can be generated | | [LOCAL_DICTIONARY_INCLUDE](#local-dictionary-configuration) | Columns for which local dictionary needs to be generated. Useful when local dictionary need not be generated for all string/varchar/char columns | | [LOCAL_DICTIONARY_EXCLUDE](#local-dictionary-configuration) | Columns for which local dictionary generation should be skipped. Useful when local dictionary need not be generated for few string/varchar/char columns | | [COLUMN_META_CACHE](#caching-minmax-value-for-required-columns) | Columns whose metadata can be cached in Driver for efficient pruning and improved query performance | | [CACHE_LEVEL](#caching-at-block-or-blocklet-level) | Column metadata caching level. Whether to cache column metadata of block or blocklet | -| [flat_folder](#support-flat-folder-same-as-hiveparquet) | Whether to write all the carbondata files in a single folder.Not writing segments folder during incremental load | +| [FLAT_FOLDER](#support-flat-folder-same-as-hiveparquet) | Whether to write all the carbondata files in a single folder.Not writing segments folder during incremental load | | [LONG_STRING_COLUMNS](#string-longer-than-32000-characters) | Columns which are greater than 32K characters| | [BUCKETNUMBER](#bucketing) | Number of buckets to be created | | [BU
svn commit: r29976 - in /dev/carbondata/1.5.0-rc2: ./ apache-carbondata-1.5.0-source-release.zip apache-carbondata-1.5.0-source-release.zip.asc apache-carbondata-1.5.0-source-release.zip.md5 apache-ca
Author: ravipesala Date: Tue Oct 9 19:12:44 2018 New Revision: 29976 Log: Upload 1.5.0 RC2 Added: dev/carbondata/1.5.0-rc2/ dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip (with props) dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.asc dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.md5 dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.sha512 Added: dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip == Binary file - no diff available. Propchange: dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip -- svn:mime-type = application/octet-stream Added: dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.asc == --- dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.asc (added) +++ dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.asc Tue Oct 9 19:12:44 2018 @@ -0,0 +1,16 @@ +-BEGIN PGP SIGNATURE- + +iQIzBAABCgAdFiEER3EpqJTxH7zLwCVHutcqeKexsu4FAlu8zuUACgkQutcqeKex +su4F3RAAghG9aRYNv5HI7KeMuf7Y5qBT1zXu6YhnZC2s6O7cEJR1zY+MR7PSwo6N +lTZD1Py853b1Q4qdGjeU+oW0LFa3uHtW0Myi9yOfHur77GMorXEjA12/0a4uTmaQ +Sa+e4FWA0nG9qzijoy8qwa2eEhOyZX8VR3Ytv1ld6jAZt54O8YUY7Yec1rx6OcCS +/Cy6CS1DKRHVSY/ivv/AwFQH15iqZtrKc82y0cTfOFL529hf73xQfcc8ux7DOyXG +uFvDn8DoQdVviGs4Sphjern7m62FOW9iz8zSVItr+9sgVLMeN4gyIPlVBUcC42uU +UFMTN4lrVqamAw2RmCTEge8xkT7qAx4PWuK9P9pu4dCkoh6KpJGrV0I13JeJuBdD +ojZkBcrNLJkEyJV7CZZn3I/+9IKd8U7zO/siUEIqCPCTt74TzEZt7uFq9W6uVUMa +L87mho51yXytEF2rLy5B+o//99sashdu7SjJYsqBHb5LZu8+KHN6TEhWksocZJYf +2H+Ucov/i3q/jJmpAqg7fUiguKAH7QTcbKQLTmaLk4pMIy8l4S80zBY6Oq8+nkYB +SvLK3N0dn7B+zVBAd4JeVUcZjrePgYotWiQE7Zju1YK3aXOASeYVT+Zmgq6Z7lpq +NUix8AEHEyu4vK4zIp3YFnimBkfP9mP/pQzaRHXpOuWOugJhFkk= +=7piG +-END PGP SIGNATURE- Added: dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.md5 == --- dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.md5 (added) +++ dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.md5 Tue Oct 9 19:12:44 2018 @@ -0,0 +1 @@ +e5ac2490ce3f52531041b82117188e58 apache-carbondata-1.5.0-source-release.zip Added: dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.sha512 == --- dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.sha512 (added) +++ dev/carbondata/1.5.0-rc2/apache-carbondata-1.5.0-source-release.zip.sha512 Tue Oct 9 19:12:44 2018 @@ -0,0 +1 @@ +783b18c0642f239a70cb30db96051d4d8b76dec982f18e64d03123d0f20134e13644dc899850c61a7213400a1980ee77445512708f670c94c55dbbd8ce30e1e1 apache-carbondata-1.5.0-source-release.zip
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.5.0-rc2 [created] d47ed1634
carbondata git commit: [maven-release-plugin] prepare release apache-carbondata-1.5.0-rc2
Repository: carbondata Updated Branches: refs/heads/branch-1.5 3c7b33992 -> 935cf3a52 [maven-release-plugin] prepare release apache-carbondata-1.5.0-rc2 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/935cf3a5 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/935cf3a5 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/935cf3a5 Branch: refs/heads/branch-1.5 Commit: 935cf3a5291a12a39f8c68b32157e26b8b1ef92b Parents: 3c7b339 Author: ravipesala Authored: Tue Oct 9 22:42:22 2018 +0530 Committer: ravipesala Committed: Tue Oct 9 22:42:22 2018 +0530 -- assembly/pom.xml | 2 +- common/pom.xml| 2 +- core/pom.xml | 2 +- datamap/bloom/pom.xml | 6 ++ datamap/examples/pom.xml | 6 ++ datamap/lucene/pom.xml| 6 ++ examples/spark2/pom.xml | 2 +- format/pom.xml| 2 +- hadoop/pom.xml| 2 +- integration/hive/pom.xml | 2 +- integration/presto/pom.xml| 2 +- integration/spark-common-test/pom.xml | 14 +++--- integration/spark-common/pom.xml | 2 +- integration/spark-datasource/pom.xml | 2 +- integration/spark2/pom.xml| 2 +- pom.xml | 4 ++-- processing/pom.xml| 2 +- store/sdk/pom.xml | 6 ++ store/search/pom.xml | 6 ++ streaming/pom.xml | 6 ++ tools/cli/pom.xml | 6 ++ 21 files changed, 35 insertions(+), 49 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/935cf3a5/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index eee3e0a..bc91147 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0-SNAPSHOT +1.5.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/935cf3a5/common/pom.xml -- diff --git a/common/pom.xml b/common/pom.xml index 1209388..0bd6a8d 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0-SNAPSHOT +1.5.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/935cf3a5/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index a7d6f4d..c8c2889 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0-SNAPSHOT +1.5.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/935cf3a5/datamap/bloom/pom.xml -- diff --git a/datamap/bloom/pom.xml b/datamap/bloom/pom.xml index d13eb4f..2c8bdc8 100644 --- a/datamap/bloom/pom.xml +++ b/datamap/bloom/pom.xml @@ -1,12 +1,10 @@ -http://maven.apache.org/POM/4.0.0; - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd;> +http://maven.apache.org/POM/4.0.0; xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd;> 4.0.0 org.apache.carbondata carbondata-parent -1.5.0-SNAPSHOT +1.5.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/935cf3a5/datamap/examples/pom.xml -- diff --git a/datamap/examples/pom.xml b/datamap/examples/pom.xml index be65529..5450d87 100644 --- a/datamap/examples/pom.xml +++ b/datamap/examples/pom.xml @@ -15,16 +15,14 @@ See the License for the specific language governing permissions and limitations under the License. --> -http://maven.apache.org/POM/4.0.0; - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd;> +http://maven.apache.org/POM/4.0.0; xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd;> 4.0.0 org.apache.carbondata carbondata-parent -1.5.0-SNAPSHOT +1.5.0 ../../pom.xml http://git-wip-us.apache.org
[18/45] carbondata git commit: [HOTFIX][Streaming] Avoid throwing NPE during deleting the streaming lock file
[HOTFIX][Streaming] Avoid throwing NPE during deleting the streaming lock file We should check whether the lock file is exists or not before deleting it. If the lock file is exists, need to delete it. If the lock file is not exists, no need to do anything. This closes #2775 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c0163616 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c0163616 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c0163616 Branch: refs/heads/branch-1.5 Commit: c016361639df899eeefa956e2dcb23fa962e6e7f Parents: 8284d9e Author: QiangCai Authored: Thu Sep 27 20:32:40 2018 +0800 Committer: Jacky Li Committed: Fri Sep 28 19:39:03 2018 +0800 -- .../management/CarbonAlterTableCompactionCommand.scala | 8 ++-- .../spark/carbondata/TestStreamingTableOperation.scala | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/c0163616/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala index 8b6dabd..b699ec1 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonAlterTableCompactionCommand.scala @@ -334,8 +334,12 @@ case class CarbonAlterTableCompactionCommand( val streamingLock = CarbonLockFactory.getCarbonLockObj( carbonTable.getTableInfo.getOrCreateAbsoluteTableIdentifier, LockUsage.STREAMING_LOCK) -if (!FileFactory.getCarbonFile(streamingLock.getLockFilePath).delete()) { - LOGGER.warn("failed to delete lock file: " + streamingLock.getLockFilePath) +val lockFile = + FileFactory.getCarbonFile(streamingLock.getLockFilePath, FileFactory.getConfiguration) +if (lockFile.exists()) { + if (!lockFile.delete()) { +LOGGER.warn("failed to delete lock file: " + streamingLock.getLockFilePath) + } } try { if (streamingLock.lockWithRetries()) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/c0163616/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala -- diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala index c4e3517..43c1e5a 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala @@ -1506,6 +1506,9 @@ class TestStreamingTableOperation extends QueryTest with BeforeAndAfterAll { } test("auto hand off, close and reopen streaming table") { +sql("alter table streaming.stream_table_reopen compact 'close_streaming'") +sql("ALTER TABLE streaming.stream_table_reopen SET TBLPROPERTIES('streaming'='true')") + executeStreamingIngest( tableName = "stream_table_reopen", batchNums = 2,
[21/45] carbondata git commit: [CARBONDATA-2989] Upgrade spark integration version to 2.3.2
[CARBONDATA-2989] Upgrade spark integration version to 2.3.2 1.According to SPARK-PR#22346, change the parameter type from 'outputColumns: Seq[Attribute]' to 'outputColumnNames: Seq[String]' when call 'writeAndRead' method; 2.According to SPARK-PR#21815, there are some parameters added 'lazy', so move original class 'CarbonDataSourceScan' to src path 'commonTo2.1And2.2', and add a new class 'CarbonDataSourceScan' in src path 'spark2.3' which is added some lazy parameters. 3.Upgrade spark integration version to 2.3.2. This closes #2779 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2081bc87 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2081bc87 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2081bc87 Branch: refs/heads/branch-1.5 Commit: 2081bc87a5846055c861b28dfc1e3383c53e7ee0 Parents: 1c1ced3 Author: Zhang Zhichao <441586...@qq.com> Authored: Fri Sep 28 01:30:34 2018 +0800 Committer: chenliang613 Committed: Sun Sep 30 16:59:47 2018 +0800 -- .../testsuite/bigdecimal/TestBigDecimal.scala | 2 +- .../spark/util/CarbonReflectionUtils.scala | 6 +- integration/spark-datasource/pom.xml| 2 +- integration/spark2/pom.xml | 5 +- .../strategy/CarbonDataSourceScan.scala | 53 ++ .../strategy/CarbonDataSourceScan.scala | 53 -- .../strategy/CarbonDataSourceScan.scala | 58 pom.xml | 4 +- 8 files changed, 124 insertions(+), 59 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/2081bc87/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala index 1f7aafe..551b00b 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala @@ -45,7 +45,7 @@ class TestBigDecimal extends QueryTest with BeforeAndAfterAll { sql("create table if not exists hiveTable(ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary Decimal(17,2))row format delimited fields terminated by ','") sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/decimalDataWithHeader.csv' into table carbonTable") sql(s"LOAD DATA local inpath '$resourcesPath/decimalDataWithoutHeader.csv' INTO table hiveTable") -sql("create table if not exists hiveBigDecimal(ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary decimal(27, 10))row format delimited fields terminated by ','") +sql("create table if not exists hiveBigDecimal(ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary decimal(30, 10))row format delimited fields terminated by ','") sql(s"LOAD DATA local inpath '$resourcesPath/decimalBoundaryDataHive.csv' INTO table hiveBigDecimal") sql("create table if not exists carbonBigDecimal_2 (ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary decimal(30, 10)) STORED BY 'org.apache.carbondata.format'") sql(s"LOAD DATA LOCAL INPATH '$resourcesPath/decimalBoundaryDataCarbon.csv' into table carbonBigDecimal_2") http://git-wip-us.apache.org/repos/asf/carbondata/blob/2081bc87/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala b/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala index 9955286..0055e87 100644 --- a/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala +++ b/integration/spark-common/src/main/scala/org/apache/spark/util/CarbonReflectionUtils.scala @@ -294,9 +294,11 @@ object CarbonReflectionUtils { .getMethod("writeAndRead", classOf[SaveMode], classOf[LogicalPlan], - classOf[Seq[Attribute]], + classOf[Seq[String]], classOf[SparkPlan]) - method.invoke(dataSourceObj, mode, query, query.output, physicalPlan) + // since spark 2.3.2 version (SPARK-PR#22346), + // change 'query.output' to
[17/45] carbondata git commit: [CARBONDATA-2974] Fixed multiple expressions issue on datamap chooser and bloom datamap
[CARBONDATA-2974] Fixed multiple expressions issue on datamap chooser and bloom datamap DataMap framework provide a mechanism to composite expression and forward it to corresponding datamap, in this way, the datamap can handle the pruning in batch. But currently the expressions the framework forwarded contains the one that cannot be supported by the datamap, so here we optimize the datamap chooser. We will composite the expression and wrap them into AndExpression. These expressions are exactly the datamap wanted. The bloomfilter datamap changed accordingly to handle the AndExpression. This closes #2767 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8284d9ed Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8284d9ed Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8284d9ed Branch: refs/heads/branch-1.5 Commit: 8284d9ed1fe60d8881788656b7f78c055f76e453 Parents: 8427771 Author: ravipesala Authored: Wed Sep 26 16:56:03 2018 +0530 Committer: xuchuanyin Committed: Fri Sep 28 16:46:49 2018 +0800 -- .../carbondata/core/datamap/DataMapChooser.java | 76 ++-- .../datamap/bloom/BloomCoarseGrainDataMap.java | 8 ++- .../bloom/BloomCoarseGrainDataMapSuite.scala| 62 +++- 3 files changed, 106 insertions(+), 40 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/8284d9ed/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java index 68696cf..3b6537c 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java @@ -39,6 +39,7 @@ import org.apache.carbondata.core.scan.expression.logical.AndExpression; import org.apache.carbondata.core.scan.expression.logical.OrExpression; import org.apache.carbondata.core.scan.filter.intf.ExpressionType; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.TrueConditionalResolverImpl; /** * This chooser does 2 jobs. @@ -123,9 +124,11 @@ public class DataMapChooser { if (resolverIntf != null) { Expression expression = resolverIntf.getFilterExpression(); List datamaps = level == DataMapLevel.CG ? cgDataMaps : fgDataMaps; - ExpressionTuple tuple = selectDataMap(expression, datamaps, resolverIntf); - if (tuple.dataMapExprWrapper != null) { -return tuple.dataMapExprWrapper; + if (datamaps.size() > 0) { +ExpressionTuple tuple = selectDataMap(expression, datamaps, resolverIntf); +if (tuple.dataMapExprWrapper != null) { + return tuple.dataMapExprWrapper; +} } } return null; @@ -177,34 +180,35 @@ public class DataMapChooser { // If both left and right has datamap then we can either merge both datamaps to single // datamap if possible. Otherwise apply AND expression. if (left.dataMapExprWrapper != null && right.dataMapExprWrapper != null) { -filterExpressionTypes.add( - left.dataMapExprWrapper.getFilterResolverIntf().getFilterExpression() -.getFilterExpressionType()); -filterExpressionTypes.add( - right.dataMapExprWrapper.getFilterResolverIntf().getFilterExpression() -.getFilterExpressionType()); +filterExpressionTypes.addAll(left.filterExpressionTypes); +filterExpressionTypes.addAll(right.filterExpressionTypes); List columnExpressions = new ArrayList<>(); columnExpressions.addAll(left.columnExpressions); columnExpressions.addAll(right.columnExpressions); // Check if we can merge them to single datamap. TableDataMap dataMap = chooseDataMap(allDataMap, columnExpressions, filterExpressionTypes); +TrueConditionalResolverImpl resolver = new TrueConditionalResolverImpl( +new AndExpression(left.expression, right.expression), false, +true); if (dataMap != null) { ExpressionTuple tuple = new ExpressionTuple(); tuple.columnExpressions = columnExpressions; - tuple.dataMapExprWrapper = new DataMapExprWrapperImpl(dataMap, filterResolverIntf); + tuple.dataMapExprWrapper = new DataMapExprWrapperImpl(dataMap, resolver); + tuple.expression = resolver.getFilterExpression();
[10/45] carbondata git commit: [CARBONDATA-2971] Add shard info of blocklet for debugging
[CARBONDATA-2971] Add shard info of blocklet for debugging add toString method to print both shard name and blocklet id for debugging. This closes #2765 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5c0da31a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5c0da31a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5c0da31a Branch: refs/heads/branch-1.5 Commit: 5c0da31a5a0afaf707455fa80ac431a082a57ec9 Parents: 3cd8b94 Author: Manhua Authored: Wed Sep 26 10:34:54 2018 +0800 Committer: xuchuanyin Committed: Thu Sep 27 11:37:56 2018 +0800 -- .../carbondata/core/indexstore/Blocklet.java| 21 .../blockletindex/BlockletDataMapFactory.java | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/5c0da31a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java index c6e1681..3270d08 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java @@ -65,17 +65,20 @@ public class Blocklet implements Writable,Serializable { return filePath; } - @Override public void write(DataOutput out) throws IOException { + @Override + public void write(DataOutput out) throws IOException { out.writeUTF(filePath); out.writeUTF(blockletId); } - @Override public void readFields(DataInput in) throws IOException { + @Override + public void readFields(DataInput in) throws IOException { filePath = in.readUTF(); blockletId = in.readUTF(); } - @Override public boolean equals(Object o) { + @Override + public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; @@ -92,7 +95,17 @@ public class Blocklet implements Writable,Serializable { blocklet.blockletId == null; } - @Override public int hashCode() { + @Override + public String toString() { +final StringBuffer sb = new StringBuffer("Blocklet{"); +sb.append("filePath='").append(filePath).append('\''); +sb.append(", blockletId='").append(blockletId).append('\''); +sb.append('}'); +return sb.toString(); + } + + @Override + public int hashCode() { int result = filePath != null ? filePath.hashCode() : 0; result = 31 * result; if (compareBlockletIdForObjectMatching) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/5c0da31a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java index e16c3cd..096a5e3 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMapFactory.java @@ -252,7 +252,7 @@ public class BlockletDataMapFactory extends CoarseGrainDataMapFactory } } } -throw new IOException("Blocklet with blockid " + blocklet.getBlockletId() + " not found "); +throw new IOException("Blocklet not found: " + blocklet.toString()); }
[23/45] carbondata git commit: [CARBONDATA-2982] CarbonSchemaReader support array
[CARBONDATA-2982] CarbonSchemaReader support array This PR fix the issue and change : org.apache.carbondata.sdk.file.CarbonSchemaReader#readSchemaInDataFile org.apache.carbondata.sdk.file.CarbonSchemaReader#readSchemaInIndexFile This PR remove child schema This closes #2780 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d8a51c9b Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d8a51c9b Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d8a51c9b Branch: refs/heads/branch-1.5 Commit: d8a51c9bf314fb1cd5f6112e66eb04e776a0553d Parents: 7d1fcb3 Author: xubo245 Authored: Fri Sep 28 11:47:22 2018 +0800 Committer: manishgupta88 Committed: Wed Oct 3 16:24:38 2018 +0530 -- .../examples/sdk/CarbonReaderExample.java | 4 +- .../carbondata/examplesCI/RunExamples.scala | 5 ++ .../carbondata/sdk/file/CarbonSchemaReader.java | 14 +++- .../carbondata/sdk/file/CarbonReaderTest.java | 86 4 files changed, 105 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8a51c9b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java -- diff --git a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java index 9e80567..ef4ae7a 100644 --- a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java +++ b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java @@ -122,10 +122,11 @@ public class CarbonReaderExample { for (int j = 0; j < arr.length; j++) { System.out.print(arr[j] + " "); } +assert (arr[0].equals("Hello")); +assert (arr[3].equals("Carbon")); System.out.println(); i++; } -System.out.println("\nFinished"); reader.close(); // Read data @@ -148,7 +149,6 @@ public class CarbonReaderExample { row[5], row[6], row[7], row[8], row[9], row[10])); i++; } -System.out.println("\nFinished"); reader2.close(); FileUtils.deleteDirectory(new File(path)); } catch (Throwable e) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8a51c9b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala -- diff --git a/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala index 2b9b999..6a13dc3 100644 --- a/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala +++ b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala @@ -23,6 +23,7 @@ import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.examples._ import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.util.CarbonProperties +import org.apache.carbondata.examples.sdk.CarbonReaderExample /** * Test suite for examples @@ -113,4 +114,8 @@ class RunExamples extends QueryTest with BeforeAndAfterAll { test("ExternalTableExample") { ExternalTableExample.exampleBody(spark) } + + test("CarbonReaderExample") { +CarbonReaderExample.main(null) + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8a51c9b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java -- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java index d8882bc..e84a25a 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonSchemaReader.java @@ -65,7 +65,15 @@ public class CarbonSchemaReader { */ public static Schema readSchemaInDataFile(String dataFilePath) throws IOException { CarbonHeaderReader reader = new CarbonHeaderReader(dataFilePath); -return new Schema(reader.readSchema()); +List columnSchemaList = new ArrayList(); +List schemaList = reader.readSchema(); +for (int i = 0; i < schemaList.size(); i++) { + ColumnSchema columnSchema = schemaList.get(i); + if (!(columnSchema.getColumnName().contains("."))) { +
[32/45] carbondata git commit: [CARBONDATA-2990] Fixed JVM crash when rebuilding bloom datamap
[CARBONDATA-2990] Fixed JVM crash when rebuilding bloom datamap Problem: while rebuilding the datamap it access the datamap store so it builds datamap and store in unsafe onheap storage. But while closing the reader it frees all memory acquired during that task. Since acquired memory is onheap but releasing the memory with offheap allocator it crashes the jvm. Solution: Maintain the type of memory acquired in the memory block itself and get the allocator as per the memory type and release it. This closes #2793 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8fbd4a5f Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8fbd4a5f Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8fbd4a5f Branch: refs/heads/branch-1.5 Commit: 8fbd4a5f53070b3755f1f573b09e0066fa93a6ea Parents: c3a8704 Author: ravipesala Authored: Sun Sep 30 11:27:57 2018 +0530 Committer: manishgupta88 Committed: Thu Oct 4 14:39:25 2018 +0530 -- .../core/indexstore/UnsafeMemoryDMStore.java| 14 +++ .../core/memory/HeapMemoryAllocator.java| 5 ++- .../carbondata/core/memory/MemoryBlock.java | 14 ++- .../carbondata/core/memory/MemoryType.java | 23 ++ .../core/memory/UnsafeMemoryAllocator.java | 2 +- .../core/memory/UnsafeMemoryManager.java| 44 +++- 6 files changed, 70 insertions(+), 32 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/8fbd4a5f/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java index 196559a..0db1b0a 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java @@ -19,9 +19,9 @@ package org.apache.carbondata.core.indexstore; import org.apache.carbondata.core.indexstore.row.DataMapRow; import org.apache.carbondata.core.indexstore.row.UnsafeDataMapRow; import org.apache.carbondata.core.indexstore.schema.CarbonRowSchema; -import org.apache.carbondata.core.memory.MemoryAllocator; import org.apache.carbondata.core.memory.MemoryBlock; import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.memory.MemoryType; import org.apache.carbondata.core.memory.UnsafeMemoryManager; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; @@ -51,7 +51,7 @@ public class UnsafeMemoryDMStore extends AbstractMemoryDMStore { public UnsafeMemoryDMStore() throws MemoryException { this.allocatedSize = capacity; this.memoryBlock = -UnsafeMemoryManager.allocateMemoryWithRetry(MemoryAllocator.HEAP, taskId, allocatedSize); +UnsafeMemoryManager.allocateMemoryWithRetry(MemoryType.ONHEAP, taskId, allocatedSize); this.pointers = new int[1000]; } @@ -74,10 +74,10 @@ public class UnsafeMemoryDMStore extends AbstractMemoryDMStore { private void increaseMemory(int requiredMemory) throws MemoryException { MemoryBlock newMemoryBlock = UnsafeMemoryManager -.allocateMemoryWithRetry(MemoryAllocator.HEAP, taskId, allocatedSize + requiredMemory); +.allocateMemoryWithRetry(MemoryType.ONHEAP, taskId, allocatedSize + requiredMemory); getUnsafe().copyMemory(this.memoryBlock.getBaseObject(), this.memoryBlock.getBaseOffset(), newMemoryBlock.getBaseObject(), newMemoryBlock.getBaseOffset(), runningLength); -UnsafeMemoryManager.INSTANCE.freeMemory(MemoryAllocator.HEAP, taskId, this.memoryBlock); +UnsafeMemoryManager.INSTANCE.freeMemory(taskId, this.memoryBlock); allocatedSize = allocatedSize + requiredMemory; this.memoryBlock = newMemoryBlock; } @@ -190,10 +190,10 @@ public class UnsafeMemoryDMStore extends AbstractMemoryDMStore { public void finishWriting() throws MemoryException { if (runningLength < allocatedSize) { MemoryBlock allocate = - UnsafeMemoryManager.allocateMemoryWithRetry(MemoryAllocator.HEAP, taskId, runningLength); + UnsafeMemoryManager.allocateMemoryWithRetry(MemoryType.ONHEAP, taskId, runningLength); getUnsafe().copyMemory(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset(), allocate.getBaseObject(), allocate.getBaseOffset(), runningLength); - UnsafeMemoryManager.INSTANCE.freeMemory(MemoryAllocator.HEAP, taskId, memoryBlock); + UnsafeMemoryManager.INSTANCE.freeMemory(taskId, memoryBlock); memoryBlock = allocate; } // Comp
[37/45] carbondata git commit: [CARBONDATA-2983][BloomDataMap] Change bloom query model to proceed multiple filter values
[CARBONDATA-2983][BloomDataMap] Change bloom query model to proceed multiple filter values This PR is supposed to optimize the pruning for InExpression. For a expression like colA in (1, 2, 3). Previously it will create 3 bloom query model and for each query model it will iterate all the bloomfilters; But now we only need to generate 1 bloom query model. Also this PR does an intersection for the pruned result generated by each expression. This optimization is important if we have multiple index columns and used them in one query. This closes #2781 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/396c26f5 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/396c26f5 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/396c26f5 Branch: refs/heads/branch-1.5 Commit: 396c26f531192e3da8a233ca14024dd8899da52a Parents: 3edea12 Author: Manhua Authored: Fri Sep 28 11:39:39 2018 +0800 Committer: ravipesala Committed: Thu Oct 4 18:13:12 2018 +0530 -- .../datamap/bloom/BloomCoarseGrainDataMap.java | 98 1 file changed, 59 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/396c26f5/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java -- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java index ee71142..a5376be 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java @@ -21,15 +21,7 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.text.DateFormat; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TimeZone; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import org.apache.carbondata.common.annotations.InterfaceAudience; @@ -47,7 +39,6 @@ import org.apache.carbondata.core.indexstore.Blocklet; import org.apache.carbondata.core.indexstore.PartitionSpec; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.CarbonMetadata; -import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.CarbonTable; @@ -171,7 +162,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { @Override public List prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List partitions) throws IOException { -Set hitBlocklets = new HashSet<>(); +Set hitBlocklets = null; if (filterExp == null) { // null is different from empty here. Empty means after pruning, no blocklet need to scan. return null; @@ -185,6 +176,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { throw new RuntimeException(e); } for (BloomQueryModel bloomQueryModel : bloomQueryModels) { + Set tempHitBlockletsResult = new HashSet<>(); LOGGER.debug("prune blocklet for query: " + bloomQueryModel); BloomCacheKeyValue.CacheKey cacheKey = new BloomCacheKeyValue.CacheKey( this.indexPath.toString(), bloomQueryModel.columnName); @@ -195,17 +187,32 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { // skip shard which has been pruned in Main datamap continue; } -boolean scanRequired = bloomFilter.membershipTest(new Key(bloomQueryModel.filterValue)); +boolean scanRequired = false; +for (byte[] value: bloomQueryModel.filterValues) { + scanRequired = bloomFilter.membershipTest(new Key(value)); + if (scanRequired) { +// if any filter value hit this bloomfilter +// no need to check other filter values +break; + } +} if (scanRequired) { LOGGER.debug(String.format("BloomCoarseGrainDataMap: Need to scan -> blocklet#%s", String.valueOf(bloomFilter.getBlockletNo(; Blocklet blocklet = new Blocklet(bloomFilter.getShardName(), - String.valueOf(bloomFilter.getBlockletNo())); - hitBlocklets.add(block
[41/45] carbondata git commit: [Documentation] Readme updated with latest topics and new TOC
http://git-wip-us.apache.org/repos/asf/carbondata/blob/ca30ad97/docs/configuration-parameters.md -- diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md index 9dd8164..0a4565a 100644 --- a/docs/configuration-parameters.md +++ b/docs/configuration-parameters.md @@ -16,7 +16,7 @@ --> # Configuring CarbonData - This guide explains the configurations that can be used to tune CarbonData to achieve better performance.Most of the properties that control the internal settings have reasonable default values.They are listed along with the properties along with explanation. + This guide explains the configurations that can be used to tune CarbonData to achieve better performance.Most of the properties that control the internal settings have reasonable default values. They are listed along with the properties along with explanation. * [System Configuration](#system-configuration) * [Data Loading Configuration](#data-loading-configuration) @@ -31,68 +31,68 @@ This section provides the details of all the configurations required for the Car | Property | Default Value | Description | ||-|--| -| carbon.storelocation | spark.sql.warehouse.dir property value | Location where CarbonData will create the store, and write the data in its custom format. If not specified,the path defaults to spark.sql.warehouse.dir property. NOTE: Store location should be in HDFS. | -| carbon.ddl.base.hdfs.url | (none) | To simplify and shorten the path to be specified in DDL/DML commands, this property is supported.This property is used to configure the HDFS relative path, the path configured in carbon.ddl.base.hdfs.url will be appended to the HDFS path configured in fs.defaultFS of core-site.xml. If this path is configured, then user need not pass the complete path while dataload. For example: If absolute path of the csv file is hdfs://10.18.101.155:54310/data/cnbc/2016/xyz.csv, the path "hdfs://10.18.101.155:54310" will come from property fs.defaultFS and user can configure the /data/cnbc/ as carbon.ddl.base.hdfs.url. Now while dataload user can specify the csv path as /2016/xyz.csv. | -| carbon.badRecords.location | (none) | CarbonData can detect the records not conforming to defined table schema and isolate them as bad records.This property is used to specify where to store such bad records. | -| carbon.streaming.auto.handoff.enabled | true | CarbonData supports storing of streaming data.To have high throughput for streaming, the data is written in Row format which is highly optimized for write, but performs poorly for query.When this property is true and when the streaming data size reaches ***carbon.streaming.segment.max.size***, CabonData will automatically convert the data to columnar format and optimize it for faster querying.**NOTE:** It is not recommended to keep the default value which is true. | -| carbon.streaming.segment.max.size | 102400 | CarbonData writes streaming data in row format which is optimized for high write throughput.This property defines the maximum size of data to be held is row format, beyond which it will be converted to columnar format in order to support high performane query, provided ***carbon.streaming.auto.handoff.enabled*** is true. **NOTE:** Setting higher value will impact the streaming ingestion. The value has to be configured in bytes. | -| carbon.query.show.datamaps | true | CarbonData stores datamaps as independent tables so as to allow independent maintenance to some extent.When this property is true,which is by default, show tables command will list all the tables including datatmaps(eg: Preaggregate table), else datamaps will be excluded from the table list.**NOTE:** It is generally not required for the user to do any maintenance operations on these tables and hence not required to be seen.But it is shown by default so that user or admin can get clear understanding of the system for capacity planning. | -| carbon.segment.lock.files.preserve.hours | 48 | In order to support parallel data loading onto the same table, CarbonData sequences(locks) at the granularity of segments.Operations affecting the segment(like IUD, alter) are blocked from parallel operations.This property value
[31/45] carbondata git commit: [HOTFIX] Changes to align printing of information in explain command based on enable.query.statistics flag
[HOTFIX] Changes to align printing of information in explain command based on enable.query.statistics flag Now user need to set the flag enable.query.statistics = true to print the pruning and dataMap related information in the explain command. This closes #2795 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c3a87044 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c3a87044 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c3a87044 Branch: refs/heads/branch-1.5 Commit: c3a8704494d8d4cffbe8f35c43101ce248913965 Parents: d8003a3 Author: manishgupta88 Authored: Wed Oct 3 18:13:45 2018 +0530 Committer: ravipesala Committed: Wed Oct 3 20:23:12 2018 +0530 -- .../apache/carbondata/core/profiler/ExplainCollector.java | 9 - docs/datamap/datamap-management.md | 2 +- .../cluster/sdv/generated/BloomFilterDataMapTestCase.scala | 6 +- .../datamap/lucene/LuceneFineGrainDataMapSuite.scala| 5 + .../lucene/LuceneFineGrainDataMapWithSearchModeSuite.scala | 5 + .../preaggregate/TestPreAggregateTableSelection.scala | 7 +++ .../testsuite/createTable/TestRenameTableWithDataMap.scala | 8 .../spark/testsuite/datamap/CGDataMapTestCase.scala | 5 + .../spark/testsuite/datamap/FGDataMapTestCase.scala | 7 ++- .../src/main/scala/org/apache/spark/sql/CarbonSession.scala | 1 + .../sql/execution/command/table/CarbonExplainCommand.scala | 6 +- .../datamap/bloom/BloomCoarseGrainDataMapSuite.scala| 5 + 12 files changed, 61 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3a87044/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java b/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java index 755c56a..8513dac 100644 --- a/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java @@ -26,7 +26,9 @@ import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datamap.dev.expr.DataMapWrapperSimpleInfo; +import org.apache.carbondata.core.util.CarbonProperties; /** * An information collector used for EXPLAIN command, to print out @@ -52,7 +54,12 @@ public class ExplainCollector { } public static void setup() { -INSTANCE = new ExplainCollector(); +boolean isQueryStatisticsEnabled = Boolean.parseBoolean(CarbonProperties.getInstance() +.getProperty(CarbonCommonConstants.ENABLE_QUERY_STATISTICS, +CarbonCommonConstants.ENABLE_QUERY_STATISTICS_DEFAULT)); +if (isQueryStatisticsEnabled) { + INSTANCE = new ExplainCollector(); +} } public static void remove() { http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3a87044/docs/datamap/datamap-management.md -- diff --git a/docs/datamap/datamap-management.md b/docs/datamap/datamap-management.md index eee03a7..bf52c05 100644 --- a/docs/datamap/datamap-management.md +++ b/docs/datamap/datamap-management.md @@ -122,7 +122,7 @@ There is a DataMapCatalog interface to retrieve schema of all datamap, it can be How can user know whether datamap is used in the query? -User can use EXPLAIN command to know, it will print out something like +User can set enable.query.statistics = true and use EXPLAIN command to know, it will print out something like ```text == CarbonData Profiler == http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3a87044/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala -- diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala index 8acbcd6..077e007 100644 --- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala @@ -31,7 +31,8 @@ class BloomFilterDataMapTestCase extends
[25/45] carbondata git commit: [CARBONDATA-2979] select count fails when carbondata file is written through SDK and read through sparkfileformat for complex datatype map(struct->array->map)
[CARBONDATA-2979] select count fails when carbondata file is written through SDK and read through sparkfileformat for complex datatype map(struct->array->map) Problem Select query failed issue for map type when data is loaded using avro SDK and external table using carbon file format is used to query the data Analysis When data is loaded through Avro SDK which has a schema of type struct, fieldName was hard coded to val because of which during query the schema written in the file footer and schema inferred for the external table had a mismatch which lead to failure. Solution Instead of hard coding the field value as val use the given field name in the schema This closes #2774 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/682160fa Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/682160fa Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/682160fa Branch: refs/heads/branch-1.5 Commit: 682160fa1bbde5f13c8a28e0114d3f18e5ffaf79 Parents: e9a198a Author: manishgupta88 Authored: Thu Sep 27 18:02:34 2018 +0530 Committer: ravipesala Committed: Wed Oct 3 19:57:50 2018 +0530 -- .../datasource/SparkCarbonDataSourceTest.scala | 63 +++- .../sql/carbondata/datasource/TestUtil.scala| 56 - .../carbondata/sdk/file/AvroCarbonWriter.java | 11 ++-- 3 files changed, 122 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/682160fa/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala -- diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala index 3be8cb3..37677d0 100644 --- a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala +++ b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala @@ -1117,11 +1117,11 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll { } private def createParquetTable { - FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(s"$warehouse1/../warehouse2")) +val path = FileFactory.getUpdatedFilePath(s"$warehouse1/../warehouse2") +FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(s"$path")) spark.sql(s"create table par_table(male boolean, age int, height double, name string, address " + s"string," + - s"salary long, floatField float, bytefield byte) using parquet location " + - s"'$warehouse1/../warehouse2'") + s"salary long, floatField float, bytefield byte) using parquet location '$path'") (0 to 10).foreach { i => spark.sql(s"insert into par_table select 'true','$i', ${i.toDouble / 2}, 'name$i', " + s"'address$i', ${i*100}, $i.$i, '$i'") @@ -1181,6 +1181,63 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll { } } + def buildStructSchemaWithNestedArrayOfMapTypeAsValue(writerPath: String, rows: Int): Unit = { +FileFactory.deleteAllFilesOfDir(new File(writerPath)) +val mySchema = + """ +|{ +| "name": "address", +| "type": "record", +| "fields": [ +|{ +| "name": "name", +| "type": "string" +|}, +|{ +| "name": "age", +| "type": "int" +|}, +|{ +| "name": "structRecord", +| "type": { +|"type": "record", +|"name": "my_address", +|"fields": [ +| { +|"name": "street", +|"type": "string" +| }, +| { +|"name": "houseDetails", +|"type": { +| "type": "array", +| "items": { +| "name": "memberDetails", +| "type": "map
[20/45] carbondata git commit: [CARBONDATA-2980][BloomDataMap] Fix bug in clearing bloomindex cache when recreating table and datamap
[CARBONDATA-2980][BloomDataMap] Fix bug in clearing bloomindex cache when recreating table and datamap We use shard path as a part of key for bloomindex cache. However the path separator in windows is different from that in linux, which causes the cache not being cleaned when we clear the cache. (When we loading the cache, the path separator is '/' while dropping the cache, the path separator is '\') Here we fixed the bug by uniforming the path separator while clearing the cache. This closes #2778 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1c1ced32 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1c1ced32 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1c1ced32 Branch: refs/heads/branch-1.5 Commit: 1c1ced32d122ba8ce7cbad4fd29f778f5dbb4871 Parents: 9ae91cc Author: xuchuanyin Authored: Sat Sep 29 14:03:09 2018 +0800 Committer: manishgupta88 Committed: Sun Sep 30 12:19:56 2018 +0530 -- .../datamap/bloom/BloomCoarseGrainDataMapFactory.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1c1ced32/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java -- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java index 8c74c94..8974918 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java @@ -235,13 +235,13 @@ public class BloomCoarseGrainDataMapFactory extends DataMapFactory
[16/45] carbondata git commit: [CARBONDATA-2818] Upgrade presto integration version to 0.210
[CARBONDATA-2818] Upgrade presto integration version to 0.210 Upgrade presto integration version to 0.210 This closes #2733 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8427771f Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8427771f Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8427771f Branch: refs/heads/branch-1.5 Commit: 8427771fc22071099186d3310500d5fcd6c419a6 Parents: 629d625 Author: chenliang613 Authored: Wed Sep 19 08:18:28 2018 +0800 Committer: Raghunandan S Committed: Fri Sep 28 11:39:48 2018 +0530 -- docs/quick-start-guide.md | 29 .../Presto_Cluster_Setup_For_Carbondata.md | 24 integration/presto/README.md| 12 +--- integration/presto/pom.xml | 4 +-- 4 files changed, 40 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/8427771f/docs/quick-start-guide.md -- diff --git a/docs/quick-start-guide.md b/docs/quick-start-guide.md index 37c398c..0fdf055 100644 --- a/docs/quick-start-guide.md +++ b/docs/quick-start-guide.md @@ -300,24 +300,24 @@ Once the table is created,it can be queried from Presto.** ### Installing Presto - 1. Download the 0.187 version of Presto using: -`wget https://repo1.maven.org/maven2/com/facebook/presto/presto-server/0.187/presto-server-0.187.tar.gz` + 1. Download the 0.210 version of Presto using: +`wget https://repo1.maven.org/maven2/com/facebook/presto/presto-server/0.210/presto-server-0.210.tar.gz` - 2. Extract Presto tar file: `tar zxvf presto-server-0.187.tar.gz`. + 2. Extract Presto tar file: `tar zxvf presto-server-0.210.tar.gz`. 3. Download the Presto CLI for the coordinator and name it presto. ``` -wget https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/0.187/presto-cli-0.187-executable.jar +wget https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/0.210/presto-cli-0.210-executable.jar -mv presto-cli-0.187-executable.jar presto +mv presto-cli-0.210-executable.jar presto chmod +x presto ``` ### Create Configuration Files - 1. Create `etc` folder in presto-server-0.187 directory. + 1. Create `etc` folder in presto-server-0.210 directory. 2. Create `config.properties`, `jvm.config`, `log.properties`, and `node.properties` files. 3. Install uuid to generate a node.id. @@ -363,10 +363,15 @@ Once the table is created,it can be queried from Presto.** coordinator=true node-scheduler.include-coordinator=false http-server.http.port=8086 - query.max-memory=50GB - query.max-memory-per-node=2GB + query.max-memory=5GB + query.max-total-memory-per-node=5GB + query.max-memory-per-node=3GB + memory.heap-headroom-per-node=1GB discovery-server.enabled=true - discovery.uri=:8086 + discovery.uri=http://localhost:8086 + task.max-worker-threads=4 + optimizer.dictionary-aggregation=true + optimizer.optimize-hash-generation = false ``` The options `node-scheduler.include-coordinator=false` and `coordinator=true` indicate that the node is the coordinator and tells the coordinator not to do any of the computation work itself and to use the workers. @@ -383,7 +388,7 @@ Then, `query.max-memory=<30GB * number of nodes>`. ``` coordinator=false http-server.http.port=8086 - query.max-memory=50GB + query.max-memory=5GB query.max-memory-per-node=2GB discovery.uri=:8086 ``` @@ -405,12 +410,12 @@ Then, `query.max-memory=<30GB * number of nodes>`. ### Start Presto Server on all nodes ``` -./presto-server-0.187/bin/launcher start +./presto-server-0.210/bin/launcher start ``` To run it as a background process. ``` -./presto-server-0.187/bin/launcher run +./presto-server-0.210/bin/launcher run ``` To run it in foreground. http://git-wip-us.apache.org/repos/asf/carbondata/blob/8427771f/integration/presto/Presto_Cluster_Setup_For_Carbondata.md -- diff --git a/integration/presto/Presto_Cluster_Setup_For_Carbondata.md b/integration/presto/Presto_Cluster_Setup_For_Carbondata.md index 082b8fe..975e1fa 100644 --- a/integration/presto/Presto_Cluster_Setup_For_Carbondata.md +++ b/integration/presto/Presto_Cluster_Setup_For_Carbondata.md @@ -2,24 +2,24 @@ ## Installing Presto - 1. Download the 0.187 version of Presto using: - `wget https://repo1.maven.org/maven2/com/facebook/presto/presto-server/0.187/presto-server-0.187.tar.gz` + 1. Download the 0.210 version of Presto using: + `wget https://repo1.maven.org/maven2/com/facebook/presto/presto-server/0.210/presto-server-0.210.tar.gz` - 2. Extract Presto tar file:
[44/45] carbondata git commit: [CARBONDATA-2994] unify badrecordpath property name for create and load
[CARBONDATA-2994] unify badrecordpath property name for create and load Problem: Currently bad records path can be specified in create and load. In create the property name is bad_records_path and load is bad_record_path. This can cause confusion for the user. Solution: Use bad_record_path as the property for create so that both load and create use the same name. This closes #2799 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/19097f27 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/19097f27 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/19097f27 Branch: refs/heads/branch-1.5 Commit: 19097f272fe3227c71c86338bb8bf788e87cd4aa Parents: fa08825 Author: kunal642 Authored: Fri Oct 5 14:57:26 2018 +0530 Committer: ravipesala Committed: Mon Oct 8 14:29:59 2018 +0530 -- docs/ddl-of-carbondata.md| 11 +++ docs/dml-of-carbondata.md| 8 .../carbondata/hadoop/api/CarbonTableOutputFormat.java | 2 +- .../StandardPartitionBadRecordLoggerTest.scala | 2 +- .../org/apache/carbondata/spark/StreamingOption.scala| 2 +- .../sql/execution/command/carbonTableSchemaCommon.scala | 4 ++-- .../command/table/CarbonDescribeFormattedCommand.scala | 7 +++ .../spark/carbondata/BadRecordPathLoadOptionTest.scala | 4 ++-- .../spark/carbondata/TestStreamingTableOperation.scala | 8 .../carbondata/processing/util/CarbonBadRecordUtil.java | 2 +- 10 files changed, 30 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/19097f27/docs/ddl-of-carbondata.md -- diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md index 22d754a..c1a891d 100644 --- a/docs/ddl-of-carbondata.md +++ b/docs/ddl-of-carbondata.md @@ -33,6 +33,7 @@ CarbonData DDL statements are documented here,which includes: * [Hive/Parquet folder Structure](#support-flat-folder-same-as-hiveparquet) * [Extra Long String columns](#string-longer-than-32000-characters) * [Compression for Table](#compression-for-table) + * [Bad Records Path](#bad-records-path) * [CREATE TABLE AS SELECT](#create-table-as-select) * [CREATE EXTERNAL TABLE](#create-external-table) * [External Table on Transactional table location](#create-external-table-on-managed-table-data-location) @@ -454,6 +455,16 @@ CarbonData DDL statements are documented here,which includes: ``` carbon.column.compressor=zstd ``` + + - # Bad Records Path + This property is used to specify the location where bad records would be written. + As the table path remains the same after rename therefore the user can use this property to + specify bad records path for the table at the time of creation, so that the same path can + be later viewed in table description for reference. + + ``` + TBLPROPERTIES('BAD_RECORD_PATH'='/opt/badrecords'') + ``` ## CREATE TABLE AS SELECT This function allows user to create a Carbon table from any of the Parquet/Hive/Carbon table. This is beneficial when the user wants to create Carbon table from any other Parquet/Hive table and use the Carbon query engine to query and achieve better query results for cases where Carbon is faster than other file formats. Also this feature can be used for backing up the data. http://git-wip-us.apache.org/repos/asf/carbondata/blob/19097f27/docs/dml-of-carbondata.md -- diff --git a/docs/dml-of-carbondata.md b/docs/dml-of-carbondata.md index db7c118..393ebd3 100644 --- a/docs/dml-of-carbondata.md +++ b/docs/dml-of-carbondata.md @@ -240,14 +240,6 @@ CarbonData DML statements are documented here,which includes: * Since Bad Records Path can be specified in create, load and carbon properties. Therefore, value specified in load will have the highest priority, and value specified in carbon properties will have the least priority. - **Bad Records Path:** - This property is used to specify the location where bad records would be written. - - - ``` - TBLPROPERTIES('BAD_RECORDS_PATH'='/opt/badrecords'') - ``` - Example: ``` http://git-wip-us.apache.org/repos/asf/carbondata/blob/19097f27/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java -- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java index 762983b..f0ad94d 100644 --- a/hadoop/src/main/java/org/apache
[22/45] carbondata git commit: [HOTFIX] Fixed S3 metrics issue.
[HOTFIX] Fixed S3 metrics issue. Problem: When data read from s3 it shows the data read as more than the size of carbon data total size. Reason: It happens because carbondata uses dataInputStream.skip but in s3 interface it cannot handle properly it reads in a loop and reads more data than required. Solution: Use FSDataInputStream.seek instead of skip to fix this issue. This closes #2789 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7d1fcb30 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7d1fcb30 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7d1fcb30 Branch: refs/heads/branch-1.5 Commit: 7d1fcb3092a1e9da6c49f17c63c6217892e9e531 Parents: 2081bc8 Author: ravipesala Authored: Fri Sep 28 18:29:08 2018 +0530 Committer: kumarvishal09 Committed: Wed Oct 3 16:08:49 2018 +0530 -- .../datastore/filesystem/AbstractDFSCarbonFile.java | 7 +-- .../apache/carbondata/core/reader/ThriftReader.java | 16 ++-- 2 files changed, 11 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/7d1fcb30/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java index b1e476b..c764430 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/AbstractDFSCarbonFile.java @@ -327,8 +327,11 @@ public abstract class AbstractDFSCarbonFile implements CarbonFile { CompressionCodec codec = new CompressionCodecFactory(hadoopConf).getCodecByName(codecName); inputStream = codec.createInputStream(inputStream); } - -return new DataInputStream(new BufferedInputStream(inputStream)); +if (bufferSize <= 0 && inputStream instanceof FSDataInputStream) { + return (DataInputStream) inputStream; +} else { + return new DataInputStream(new BufferedInputStream(inputStream)); +} } /** http://git-wip-us.apache.org/repos/asf/carbondata/blob/7d1fcb30/core/src/main/java/org/apache/carbondata/core/reader/ThriftReader.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/reader/ThriftReader.java b/core/src/main/java/org/apache/carbondata/core/reader/ThriftReader.java index 48d8345..f5ecda6 100644 --- a/core/src/main/java/org/apache/carbondata/core/reader/ThriftReader.java +++ b/core/src/main/java/org/apache/carbondata/core/reader/ThriftReader.java @@ -25,6 +25,7 @@ import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.util.CarbonUtil; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; import org.apache.thrift.TBase; import org.apache.thrift.TException; import org.apache.thrift.protocol.TCompactProtocol; @@ -36,10 +37,6 @@ import org.apache.thrift.transport.TIOStreamTransport; */ public class ThriftReader { /** - * buffer size - */ - private static final int bufferSize = 2048; - /** * File containing the objects. */ private String fileName; @@ -101,7 +98,7 @@ public class ThriftReader { public void open() throws IOException { Configuration conf = configuration != null ? configuration : FileFactory.getConfiguration(); FileFactory.FileType fileType = FileFactory.getFileType(fileName); -dataInputStream = FileFactory.getDataInputStream(fileName, fileType, bufferSize, conf); +dataInputStream = FileFactory.getDataInputStream(fileName, fileType, conf); binaryIn = new TCompactProtocol(new TIOStreamTransport(dataInputStream)); } @@ -109,7 +106,9 @@ public class ThriftReader { * This method will set the position of stream from where data has to be read */ public void setReadOffset(long bytesToSkip) throws IOException { -if (dataInputStream.skip(bytesToSkip) != bytesToSkip) { +if (dataInputStream instanceof FSDataInputStream) { + ((FSDataInputStream)dataInputStream).seek(bytesToSkip); +} else if (dataInputStream.skip(bytesToSkip) != bytesToSkip) { throw new IOException("It doesn't set the offset properly"); } } @@ -118,10 +117,7 @@ public class ThriftReader { * Checks if another objects is available by attempting to read another byte from the stream. */ public boolean hasNext() throws IOException { -dataInputStream.mark(1); -int val = dataInputStream.read(); -dataInputStream.res
[15/45] carbondata git commit: [DOC] Add kafka example in StreamSQL doc
[DOC] Add kafka example in StreamSQL doc This closes #2769 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/629d6254 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/629d6254 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/629d6254 Branch: refs/heads/branch-1.5 Commit: 629d62549860e037c2816702564713ce696419d5 Parents: 54bcf49 Author: Jacky Li Authored: Wed Sep 26 21:56:52 2018 +0800 Committer: QiangCai Committed: Fri Sep 28 11:52:41 2018 +0800 -- docs/streaming-guide.md | 33 +++-- 1 file changed, 27 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/629d6254/docs/streaming-guide.md -- diff --git a/docs/streaming-guide.md b/docs/streaming-guide.md index 3b71662..56e400e 100644 --- a/docs/streaming-guide.md +++ b/docs/streaming-guide.md @@ -304,8 +304,9 @@ Following example shows how to start a streaming ingest job | register TIMESTAMP, | updated TIMESTAMP |) - |STORED BY carbondata + |STORED AS carbondata |TBLPROPERTIES ( + | 'streaming'='source', | 'format'='csv', | 'path'='$csvDataDir' |) @@ -324,7 +325,7 @@ Following example shows how to start a streaming ingest job | register TIMESTAMP, | updated TIMESTAMP |) - |STORED BY carbondata + |STORED AS carbondata |TBLPROPERTIES ( | 'streaming'='true' |) @@ -378,11 +379,14 @@ When this is issued, carbon will start a structured streaming job to do the stre name STRING, age INT ) - STORED BY carbondata + STORED AS carbondata TBLPROPERTIES( -'format'='socket', -'host'='localhost', -'port'='' + 'streaming'='source', + 'format'='socket', + 'host'='localhost', + 'port'='', + 'record_format'='csv', // can be csv or json, default is csv + 'delimiter'='|' ) ``` @@ -394,6 +398,7 @@ When this is issued, carbon will start a structured streaming job to do the stre .format("socket") .option("host", "localhost") .option("port", "") +.option("delimiter", "|") ``` @@ -402,6 +407,22 @@ When this is issued, carbon will start a structured streaming job to do the stre - In the given STMPROPERTIES, user must specify `'trigger'`, its value must be `ProcessingTime` (In future, other value will be supported). User should also specify interval value for the streaming job. - If the schema specifid in sink table is different from CTAS, the streaming job will fail +For Kafka data source, create the source table by: + ```SQL + CREATE TABLE source( +name STRING, +age INT + ) + STORED AS carbondata + TBLPROPERTIES( + 'streaming'='source', + 'format'='kafka', + 'kafka.bootstrap.servers'='kafkaserver:9092', + 'subscribe'='test' + 'record_format'='csv', // can be csv or json, default is csv + 'delimiter'='|' + ) + ``` # STOP STREAM
[36/45] carbondata git commit: [CARBONDATA-2986] Table Properties are lost when multiple driver concurrently
[CARBONDATA-2986] Table Properties are lost when multiple driver concurrently Issue :- When concurrently multiple driver is creating table , for same table table properties are lost . Root Cause :-Schema file is getting overwritten from CarbonRelation#createTableIfNotExists,because lookup of table is failed . this is happpened because concurrenly .mdt file is updated and current table is removed from cache org.apache.spark.sql.hive.CarbonFileMetastore#checkSchemasModifiedTimeAndReloadTable Solution :- Since carbon table is already created and Schema file is already written so no need to do lookup again . This closes #2785 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3edea12a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3edea12a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3edea12a Branch: refs/heads/branch-1.5 Commit: 3edea12a83e70dddb1eca271bf5660f73de272f5 Parents: 11bd0ad Author: BJangir Authored: Fri Sep 28 17:17:30 2018 +0530 Committer: ravipesala Committed: Thu Oct 4 18:05:06 2018 +0530 -- .../scala/org/apache/spark/sql/CarbonSource.scala | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3edea12a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala index 16cee96..cd1087d 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala @@ -57,6 +57,7 @@ class CarbonSource extends CreatableRelationProvider with RelationProvider with SchemaRelationProvider with StreamSinkProvider with DataSourceRegister { override def shortName(): String = "carbondata" + private val LOGGER = LogServiceFactory.getLogService(CarbonSource.getClass.getName) // will be called if hive supported create table command is provided override def createRelation(sqlContext: SQLContext, @@ -143,7 +144,7 @@ class CarbonSource extends CreatableRelationProvider with RelationProvider .exists(_.table.equalsIgnoreCase(tableName))) { getPathForTable(sqlContext.sparkSession, dbName, tableName, newParameters) } else { -createTableIfNotExists(sqlContext.sparkSession, newParameters, dataSchema) + createTableIfNotExists(sqlContext.sparkSession, dbName, tableName, newParameters, dataSchema) } CarbonDatasourceHadoopRelation(sqlContext.sparkSession, Array(path), updatedParams, @@ -160,6 +161,8 @@ class CarbonSource extends CreatableRelationProvider with RelationProvider private def createTableIfNotExists( sparkSession: SparkSession, + dbName: String, + tableName: String, parameters: Map[String, String], dataSchema: StructType): (String, Map[String, String]) = { @@ -167,10 +170,18 @@ class CarbonSource extends CreatableRelationProvider with RelationProvider val tableName: String = parameters.getOrElse("tableName", "").toLowerCase try { - val carbonTable = CarbonEnv.getCarbonTable(Some(dbName), tableName)(sparkSession) - (carbonTable.getTablePath, parameters) + if (!(parameters.contains("carbonSchemaPartsNo") +|| parameters.contains("carbonschemapartsno"))) { +val carbonTable = CarbonEnv.getCarbonTable(Some(dbName), tableName)(sparkSession) +(carbonTable.getTablePath, parameters) + } else { +(getPathForTable(sparkSession, dbName, tableName, parameters)) + } + } catch { case _: NoSuchTableException => +LOGGER.warn("Carbon Table [" +dbName +"] [" +tableName +"] is not found, " + + "Now existing Schema will be overwritten with default properties") val metaStore = CarbonEnv.getInstance(sparkSession).carbonMetastore val identifier = AbsoluteTableIdentifier.from( CarbonEnv.getTablePath(Some(dbName), tableName)(sparkSession),
[19/45] carbondata git commit: [CARBONDATA-2978] Fixed JVM crash issue when insert into carbon table from other carbon table
[CARBONDATA-2978] Fixed JVM crash issue when insert into carbon table from other carbon table Problem: When data is inserted from one carbon to other carbon table and unsafe load and query is enabled then JVM crash is happening. Reason: When insert happens from one carbon table another table it uses same task and thread so it gets the same taskid and at the unsafe manager tries to release all memory acquired by the task even though load happens on the task. Solution: Check the listeners and ignore cache clearing. This closes #2773 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9ae91cc5 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9ae91cc5 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9ae91cc5 Branch: refs/heads/branch-1.5 Commit: 9ae91cc5a9d683ef54550cfe7e65c4d63d5e5a24 Parents: c016361 Author: ravipesala Authored: Wed Sep 26 23:04:59 2018 +0530 Committer: kumarvishal09 Committed: Fri Sep 28 19:51:06 2018 +0530 -- .../hadoop/api/CarbonTableOutputFormat.java | 35 + .../InsertIntoNonCarbonTableTestCase.scala | 79 +++- .../carbondata/spark/rdd/CarbonScanRDD.scala| 76 --- .../rdd/InsertTaskCompletionListener.scala | 4 +- .../spark/rdd/QueryTaskCompletionListener.scala | 4 +- .../datasources/SparkCarbonFileFormat.scala | 23 +- .../CarbonTaskCompletionListener.scala | 72 ++ 7 files changed, 246 insertions(+), 47 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/9ae91cc5/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java -- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java index 28817e9..762983b 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java @@ -424,6 +424,8 @@ public class CarbonTableOutputFormat extends FileOutputFormathttp://git-wip-us.apache.org/repos/asf/carbondata/blob/9ae91cc5/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/insertQuery/InsertIntoNonCarbonTableTestCase.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/insertQuery/InsertIntoNonCarbonTableTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/insertQuery/InsertIntoNonCarbonTableTestCase.scala index a745672..a3fb11c 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/insertQuery/InsertIntoNonCarbonTableTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/insertQuery/InsertIntoNonCarbonTableTestCase.scala @@ -18,10 +18,13 @@ */ package org.apache.carbondata.spark.testsuite.insertQuery -import org.apache.spark.sql.Row +import org.apache.spark.sql.{Row, SaveMode} import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + class InsertIntoNonCarbonTableTestCase extends QueryTest with BeforeAndAfterAll { override def beforeAll { @@ -64,6 +67,8 @@ class InsertIntoNonCarbonTableTestCase extends QueryTest with BeforeAndAfterAll "Latest_webTypeDataVerNumber,Latest_operatorsVersion,Latest_phonePADPartitionedVersions," + "Latest_operatorId,gamePointDescription,gamePointId,contractNumber', " + "'bad_records_logger_enable'='false','bad_records_action'='FORCE')") + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.ENABLE_UNSAFE_IN_QUERY_EXECUTION, "true") + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.ENABLE_UNSAFE_COLUMN_PAGE, "true") } test("insert into hive") { @@ -102,7 +107,79 @@ class InsertIntoNonCarbonTableTestCase extends QueryTest with BeforeAndAfterAll sql("drop table thive_cond") } + test("jvm crash when insert data from datasource table to session table") { +val spark = sqlContext.sparkSession +import spark.implicits._ + +import scala.util.Random +val r = new Random() +val df = spark.sparkContext.parallelize(1 to 10) + .map(x => (r.nextInt(10), "name" + x % 8, "city" + x % 50, BigDecimal.apply(x % 60))) + .to
[14/45] carbondata git commit: [CARBONDATA-2972] Debug Logs and function added for Adaptive Encoding
[CARBONDATA-2972] Debug Logs and function added for Adaptive Encoding Added a function to get the type of encoding used. Added the debug log for checking which type of encoding is used This closes #2758 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/54bcf496 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/54bcf496 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/54bcf496 Branch: refs/heads/branch-1.5 Commit: 54bcf49638262af82583d930632018da6c73c8c5 Parents: 1b4109d Author: Manish Nalla Authored: Tue Sep 25 17:44:49 2018 +0530 Committer: manishgupta88 Committed: Thu Sep 27 17:16:37 2018 +0530 -- .../core/datastore/page/encoding/ColumnPageEncoder.java | 8 .../org/apache/carbondata/processing/store/TablePage.java| 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/54bcf496/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java index 3067823..44e7192 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java @@ -78,6 +78,14 @@ public abstract class ColumnPageEncoder { } } + public Encoding getEncodingType() { +List currEncodingList = getEncodingList(); +if (CarbonUtil.isEncodedWithMeta(currEncodingList)) { + return currEncodingList.get(0); +} +return null; + } + /** * Return a encoded column page by encoding the input page * The encoded binary data and metadata are wrapped in encoding column page http://git-wip-us.apache.org/repos/asf/carbondata/blob/54bcf496/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java index 791b4c6..82129db 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/TablePage.java @@ -424,7 +424,8 @@ public class TablePage { "Encoder result ---> Source data type: " + noDictDimensionPages[noDictIndex] .getDataType().getName() + " Destination data type: " + targetDataType .getName() + " for the column: " + noDictDimensionPages[noDictIndex] - .getColumnSpec().getFieldName()); + .getColumnSpec().getFieldName() + " having encoding type: " + + columnPageEncoder.getEncodingType()); } } noDictIndex++;
[28/45] carbondata git commit: [CARBONDATA-2987] Data mismatch after compaction with measure sort columns
[CARBONDATA-2987] Data mismatch after compaction with measure sort columns problem: Data mismatch after compaction with measure sort columns root cause : In compaction flow (DictionaryBasedResultCollector), in ColumnPageWrapper inverted index mapping is not handled. Because of this row ID was wrong, row of no dictionary dimension columns gets data from other rows. Hence the data mismatch. solution: Handle inverted index mapping for DictionaryBasedResultCollector flow in ColumnPageWrapper This closes #2784 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0b16816d Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0b16816d Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0b16816d Branch: refs/heads/branch-1.5 Commit: 0b16816da7b401318929bfe973dad4bf397e90d9 Parents: 6ef4e46 Author: ajantha-bhat Authored: Fri Sep 28 16:27:55 2018 +0530 Committer: ravipesala Committed: Wed Oct 3 20:09:13 2018 +0530 -- .../chunk/store/ColumnPageWrapper.java | 52 +++ .../compaction/nodictionary_compaction.csv | 3 + .../MajorCompactionWithMeasureSortColumns.scala | 97 3 files changed, 136 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/0b16816d/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java index 098287e..627c75f 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java @@ -122,10 +122,19 @@ public class ColumnPageWrapper implements DimensionColumnPage { } @Override public byte[] getChunkData(int rowId) { -return getChunkData(rowId, false); +byte[] nullBitSet = getNullBitSet(rowId, columnPage.getColumnSpec().getColumnType()); +if (nullBitSet != null) { + // if this row is null, return default null represent in byte array + return nullBitSet; +} else { + if (isExplicitSorted()) { +rowId = getInvertedReverseIndex(rowId); + } + return getChunkDataInBytes(rowId); +} } - private byte[] getChunkData(int rowId, boolean isRowIdChanged) { + private byte[] getChunkDataInBytes(int rowId) { ColumnType columnType = columnPage.getColumnSpec().getColumnType(); DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); DataType targetDataType = columnPage.getDataType(); @@ -134,15 +143,6 @@ public class ColumnPageWrapper implements DimensionColumnPage { .getDictionaryValue(CarbonUtil.getSurrogateInternal(columnPage.getBytes(rowId), 0, 3)); } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && isAdaptiveEncoded()) || ( columnType == ColumnType.PLAIN_VALUE && DataTypeUtil.isPrimitiveColumn(srcDataType))) { - if (!isRowIdChanged && columnPage.getNullBits().get(rowId) - && columnType == ColumnType.COMPLEX_PRIMITIVE) { -// if this row is null, return default null represent in byte array -return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; - } - if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) { -// if this row is null, return default null represent in byte array -return CarbonCommonConstants.EMPTY_BYTE_ARRAY; - } if (srcDataType == DataTypes.FLOAT) { float floatData = columnPage.getFloat(rowId); return ByteUtil.toXorBytes(floatData); @@ -182,9 +182,6 @@ public class ColumnPageWrapper implements DimensionColumnPage { throw new RuntimeException("unsupported type: " + targetDataType); } } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && !isAdaptiveEncoded())) { - if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) { -return CarbonCommonConstants.EMPTY_BYTE_ARRAY; - } if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN)) { byte[] out = new byte[1]; out[0] = (columnPage.getByte(rowId)); @@ -205,6 +202,18 @@ public class ColumnPageWrapper implements DimensionColumnPage { } } + private byte[] getNullBitSet(int rowId, ColumnType columnType) { +if (columnPage.getNullBits().get(rowId) && columnType == ColumnType.COMPLEX_PRIMITIVE) { + // if this row is null, return default null represent in byte array + return CarbonCommonConstants.ME
[26/45] carbondata git commit: Problem: Preaggregate select queries require table path of parent table to access dictionary files. Therefore in executor CarbonMetadata class was used to get parent tab
Problem: Preaggregate select queries require table path of parent table to access dictionary files. Therefore in executor CarbonMetadata class was used to get parent table object. As CarbonMetadata class is only meant to be used in driver and is not filled with carbontable objects for select queries therefore the query was throwing NPE. Solution: Pass parent table path from driver to executor by adding a new variable in RelationIdentifier. This will not be written to thrift, instead will be used to carry tablePath property from driver to executor. This closes #2786 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9ca985f0 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9ca985f0 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9ca985f0 Branch: refs/heads/branch-1.5 Commit: 9ca985f0d96380d96dae6fdab2d4ee014d5ac345 Parents: 682160f Author: kunal642 Authored: Fri Sep 28 17:41:32 2018 +0530 Committer: ravipesala Committed: Wed Oct 3 20:02:49 2018 +0530 -- .../core/metadata/schema/table/RelationIdentifier.java | 12 .../carbondata/core/scan/executor/util/QueryUtil.java | 10 ++ .../spark/sql/CarbonDatasourceHadoopRelation.scala | 12 3 files changed, 26 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/9ca985f0/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/RelationIdentifier.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/RelationIdentifier.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/RelationIdentifier.java index 9a1dad1..0e8042d 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/RelationIdentifier.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/RelationIdentifier.java @@ -32,6 +32,8 @@ public class RelationIdentifier implements Serializable, Writable { private String tableId; + private String tablePath = ""; + public RelationIdentifier(String databaseName, String tableName, String tableId) { this.databaseName = databaseName; this.tableName = tableName; @@ -50,16 +52,26 @@ public class RelationIdentifier implements Serializable, Writable { return tableId; } + public String getTablePath() { +return tablePath; + } + + public void setTablePath(String tablePath) { +this.tablePath = tablePath; + } + @Override public void write(DataOutput out) throws IOException { out.writeUTF(databaseName); out.writeUTF(tableName); out.writeUTF(tableId); +out.writeUTF(tablePath); } @Override public void readFields(DataInput in) throws IOException { this.databaseName = in.readUTF(); this.tableName = in.readUTF(); this.tableId = in.readUTF(); +this.tablePath = in.readUTF(); } @Override public boolean equals(Object o) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/9ca985f0/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java index 9fb0857..7849d10 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java @@ -393,12 +393,6 @@ public class QueryUtil { public static AbsoluteTableIdentifier getTableIdentifierForColumn( CarbonDimension carbonDimension) { -RelationIdentifier parentRelationIdentifier = - carbonDimension.getColumnSchema().getParentColumnTableRelations().get(0) -.getRelationIdentifier(); -String parentTablePath = CarbonMetadata.getInstance() -.getCarbonTable(parentRelationIdentifier.getDatabaseName(), -parentRelationIdentifier.getTableName()).getTablePath(); RelationIdentifier relation = carbonDimension.getColumnSchema() .getParentColumnTableRelations() .get(0) @@ -406,8 +400,8 @@ public class QueryUtil { String parentTableName = relation.getTableName(); String parentDatabaseName = relation.getDatabaseName(); String parentTableId = relation.getTableId(); -return AbsoluteTableIdentifier.from(parentTablePath, parentDatabaseName, parentTableName, -parentTableId); +return AbsoluteTableIdentifier.from(relation.getTablePath(), parentDatabaseName, +parentTableName, parentTableId); } /** http://git-wip-us.apache.org/repos/asf/carbo
[43/45] carbondata git commit: [CARBONDATA-2993] fix random NPE while concurrent loading
[CARBONDATA-2993] fix random NPE while concurrent loading This closes #2797 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fa088256 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fa088256 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fa088256 Branch: refs/heads/branch-1.5 Commit: fa0882569872d3280807a5a57f36c4c43f48cc99 Parents: ca30ad9 Author: kunal642 Authored: Fri Oct 5 10:13:05 2018 +0530 Committer: ravipesala Committed: Fri Oct 5 15:31:33 2018 +0530 -- .../scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala | 9 + .../org/apache/carbondata/sdk/file/AvroCarbonWriter.java| 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa088256/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala index 87d8f50..3a02f85 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala @@ -47,8 +47,10 @@ abstract class CarbonRDD[T: ClassTag]( info } + @transient val hadoopConf = SparkSQLUtil.sessionState(ss).newHadoopConf() + val config: Broadcast[SerializableConfiguration] = sparkContext -.broadcast(new SerializableConfiguration(SparkSQLUtil.sessionState(ss).newHadoopConf())) +.broadcast(new SerializableConfiguration(hadoopConf)) /** Construct an RDD with just a one-to-one dependency on one parent */ def this(@transient sparkSession: SparkSession, @transient oneParent: RDD[_]) = @@ -57,7 +59,7 @@ abstract class CarbonRDD[T: ClassTag]( protected def internalGetPartitions: Array[Partition] override def getPartitions: Array[Partition] = { -ThreadLocalSessionInfo.setConfigurationToCurrentThread(config.value.value) +ThreadLocalSessionInfo.setConfigurationToCurrentThread(hadoopConf) internalGetPartitions } @@ -66,8 +68,7 @@ abstract class CarbonRDD[T: ClassTag]( final def compute(split: Partition, context: TaskContext): Iterator[T] = { TaskContext.get.addTaskCompletionListener(_ => ThreadLocalSessionInfo.unsetAll()) -carbonSessionInfo.getNonSerializableExtraInfo.put("carbonConf", config - .value.value) +carbonSessionInfo.getNonSerializableExtraInfo.put("carbonConf", getConf) ThreadLocalSessionInfo.setCarbonSessionInfo(carbonSessionInfo) TaskMetricsMap.threadLocal.set(Thread.currentThread().getId) val carbonTaskInfo = new CarbonTaskInfo http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa088256/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java -- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java index d19a96d..e4a65c0 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java @@ -520,7 +520,7 @@ public class AvroCarbonWriter extends CarbonWriter { // recursively get the sub fields ArrayList arraySubField = new ArrayList<>(); // array will have only one sub field. -StructField structField = prepareSubFields("val", childSchema.getElementType()); +StructField structField = prepareSubFields(fieldName, childSchema.getElementType()); if (structField != null) { arraySubField.add(structField); return new Field(fieldName, "array", arraySubField);
[33/45] carbondata git commit: [CARBONDATA-2594] Do not add InvertedIndex in Encoding list for non-sort dimension column #2768
[CARBONDATA-2594] Do not add InvertedIndex in Encoding list for non-sort dimension column #2768 Not add InvertedIndex in Encoding list for non-sort dimension column This closes #2768 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/18fbdfc4 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/18fbdfc4 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/18fbdfc4 Branch: refs/heads/branch-1.5 Commit: 18fbdfc409dc14812c9f384c437a793e9293b32b Parents: 8fbd4a5 Author: Jacky Li Authored: Wed Sep 26 21:31:35 2018 +0800 Committer: kumarvishal09 Committed: Thu Oct 4 16:57:57 2018 +0530 -- .../carbondata/core/metadata/schema/table/TableSchemaBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/18fbdfc4/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java index f1be5ca..b5ce725 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/TableSchemaBuilder.java @@ -224,7 +224,7 @@ public class TableSchemaBuilder { } } } -if (newColumn.isDimensionColumn()) { +if (newColumn.isDimensionColumn() && newColumn.isSortColumn()) { newColumn.setUseInvertedIndex(true); } if (field.getDataType().isComplexType()) {
[38/45] carbondata git commit: [CARBONDATA-2985]Fix issues in Table level compaction and TableProperties
[CARBONDATA-2985]Fix issues in Table level compaction and TableProperties Issue :- If 2nd Level compaction is 1 like 2,1 or 6,1 then only 1st time compaction is done subsequent compaction are ignored . ( like if 2,1 is given then only 0.1 is segment is created and other segments are ignore forever ) Table level compaction does not support ,0 as 2nd level compaction value but system level compaction supports same. Solution :- if 2nd level compaction value is 1 then user does not want 2nd level compaction at all which mean 2nd level compaction can be set to 0. remove check to support 2nd level compaction as 0 in table level. This closes #2794 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/30adaa8c Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/30adaa8c Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/30adaa8c Branch: refs/heads/branch-1.5 Commit: 30adaa8c15e430b94bd1448969f50cb2451e1746 Parents: 396c26f Author: BJangir Authored: Tue Oct 2 00:17:29 2018 +0530 Committer: ravipesala Committed: Thu Oct 4 18:58:49 2018 +0530 -- .../TableLevelCompactionOptionTest.scala| 84 .../carbondata/spark/util/CommonUtil.scala | 2 +- .../processing/merger/CarbonDataMergerUtil.java | 7 ++ 3 files changed, 92 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/30adaa8c/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/TableLevelCompactionOptionTest.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/TableLevelCompactionOptionTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/TableLevelCompactionOptionTest.scala index 458d656..7b138f7 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/TableLevelCompactionOptionTest.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/TableLevelCompactionOptionTest.scala @@ -271,4 +271,88 @@ class TableLevelCompactionOptionTest extends QueryTest assert(!segmentSequenceIds.contains("0.1")) assert(!segmentSequenceIds.contains("3.1")) } + + test("AUTO MERGE TRUE:Verify 2nd Level compaction equals to 1"){ +sql("DROP TABLE IF EXISTS tablecompaction_table") +sql( + """ +|create table tablecompaction_table( +|name string,age int) stored by 'carbondata' + |tblproperties('AUTO_LOAD_MERGE'='true','COMPACTION_LEVEL_THRESHOLD'='2,1') + """.stripMargin) + +for(i <-0 until 4){ + sql("insert into tablecompaction_table select 'a',12") +} +var segments = sql("SHOW SEGMENTS FOR TABLE tablecompaction_table") +var segmentSequenceIds = segments.collect().map { each => (each.toSeq) (0) } +assert(segmentSequenceIds.size==6) +assert(segmentSequenceIds.contains("0.1")) +assert(segmentSequenceIds.contains("2.1")) + } + + test("AUTO MERGE FALSE:Verify 2nd Level compaction equals to 1"){ +sql("DROP TABLE IF EXISTS tablecompaction_table") +sql( + """ +|create table tablecompaction_table( +|name string,age int) stored by 'carbondata' +|tblproperties('COMPACTION_LEVEL_THRESHOLD'='2,1') + """.stripMargin) + +for(i <-0 until 4){ + sql("insert into tablecompaction_table select 'a',12") +} +sql("alter table tablecompaction_table compact 'minor' ") +var segments = sql("SHOW SEGMENTS FOR TABLE tablecompaction_table") +var segmentSequenceIds = segments.collect().map { each => (each.toSeq) (0) } +assert(segmentSequenceIds.size==6) +assert(segmentSequenceIds.contains("0.1")) +assert(segmentSequenceIds.contains("2.1")) + } + + // 2nd Level compaction value = 0 is supported by system level(like 6,0) + // same need to support for table level also + test("Verify 2nd Level compaction equals to 0"){ +sql("DROP TABLE IF EXISTS tablecompaction_table") +sql( + """ +|create table tablecompaction_table( +|name string,age int) stored by 'carbondata' + |tblproperties('AUTO_LOAD_MERGE'='true','COMPACTION_LEVEL_THRESHOLD'='2,0') + """.stripMargin) + +for(i <-0 until 4){ + sql("insert into tablecompaction_table select 'a',12") +} +var segments = s
[39/45] carbondata git commit: [CARBONDATA-2991]NegativeArraySizeException during query execution
[CARBONDATA-2991]NegativeArraySizeException during query execution Issue :- During Query Execution sometime NegativeArraySizeException Exception in Some Tasks . And sometime Executor is lost (JVM crash) Root Cause :- It is because existing memoryblock is removed while it was in-use. This happened because duplicate taskid generated. Sometime freed same memory addresses are assigned to another task which will initialize memory block to0 and this cause NegativeSizeArrayException whereas sometime freed memory will not be used any task of executor process but running task will try to access it and as that address is not part of process so JVM crash will happen. Solution :- Change taskID generation to UUID based instead of System.nanoTime() This closes #2796 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d3927172 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d3927172 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d3927172 Branch: refs/heads/branch-1.5 Commit: d3927172f19b2251e77b840e53d8678cba2a38bd Parents: 30adaa8 Author: BJangir Authored: Wed Oct 3 23:05:42 2018 +0530 Committer: ravipesala Committed: Thu Oct 4 21:11:09 2018 +0530 -- .../unsafe/UnsafeAbstractDimensionDataChunkStore.java | 2 +- .../core/datastore/page/UnsafeFixLengthColumnPage.java | 2 +- .../core/datastore/page/VarLengthColumnPageBase.java | 2 +- .../core/indexstore/AbstractMemoryDMStore.java | 2 +- .../carbondata/core/memory/IntPointerBuffer.java | 4 ++-- .../carbondata/core/memory/UnsafeMemoryManager.java| 12 ++-- .../core/memory/UnsafeSortMemoryManager.java | 13 +++-- .../apache/carbondata/core/util/CarbonTaskInfo.java| 6 +++--- .../org/apache/carbondata/core/util/CarbonUtil.java| 10 ++ .../carbondata/core/util/ThreadLocalTaskInfo.java | 3 ++- .../org/apache/carbondata/spark/rdd/CarbonRDD.scala| 2 +- .../org/apache/carbondata/spark/util/CommonUtil.scala | 2 +- .../loading/sort/unsafe/UnsafeCarbonRowPage.java | 4 ++-- .../loading/sort/unsafe/UnsafeSortDataRows.java| 2 +- .../org/apache/carbondata/sdk/file/CarbonReader.java | 3 ++- .../carbondata/store/worker/SearchRequestHandler.java | 9 +++-- 16 files changed, 44 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d3927172/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java index 940ca1a..89bce2d 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java @@ -62,7 +62,7 @@ public abstract class UnsafeAbstractDimensionDataChunkStore implements Dimension */ protected boolean isMemoryOccupied; - private final long taskId = ThreadLocalTaskInfo.getCarbonTaskInfo().getTaskId(); + private final String taskId = ThreadLocalTaskInfo.getCarbonTaskInfo().getTaskId(); /** * Constructor http://git-wip-us.apache.org/repos/asf/carbondata/blob/d3927172/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java index 9e0eb8d..7df29df 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java @@ -51,7 +51,7 @@ public class UnsafeFixLengthColumnPage extends ColumnPage { // size of the allocated memory, in bytes private int capacity; - private final long taskId = ThreadLocalTaskInfo.getCarbonTaskInfo().getTaskId(); + private final String taskId = ThreadLocalTaskInfo.getCarbonTaskInfo().getTaskId(); private static final int byteBits = DataTypes.BYTE.getSizeBits(); private static final int shortBits = DataTypes.SHORT.getSizeBits(); http://git-wip-us.apache.org/repos/asf/carbondata/blob/d3927172/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
[01/45] carbondata git commit: [CARBONDATA-2969]local dictioanry query fix for spark-2.3 [Forced Update!]
Repository: carbondata Updated Branches: refs/heads/branch-1.5 ef1068cad -> 3c7b33992 (forced update) [CARBONDATA-2969]local dictioanry query fix for spark-2.3 This closes #2761 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2ab2254b Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2ab2254b Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2ab2254b Branch: refs/heads/branch-1.5 Commit: 2ab2254be84f82fd2f4b99a6b73353f4c7a55d10 Parents: f239894 Author: akashrn5 Authored: Tue Sep 25 20:43:06 2018 +0530 Committer: Jacky Li Committed: Wed Sep 26 15:01:38 2018 +0800 -- .../LocalDictionarySupportLoadTableTest.scala | 14 + .../vectorreader/CarbonDictionaryWrapper.java | 44 --- .../vectorreader/ColumnarVectorWrapper.java | 11 +--- .../spark/sql/CarbonDictionaryWrapper.java | 44 +++ .../org/apache/spark/sql/CarbonVectorProxy.java | 10 ++-- .../spark/sql/CarbonDictionaryWrapper.java | 56 .../org/apache/spark/sql/CarbonVectorProxy.java | 8 +-- 7 files changed, 127 insertions(+), 60 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/2ab2254b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/localdictionary/LocalDictionarySupportLoadTableTest.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/localdictionary/LocalDictionarySupportLoadTableTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/localdictionary/LocalDictionarySupportLoadTableTest.scala index e88d8a9..d23c844 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/localdictionary/LocalDictionarySupportLoadTableTest.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/localdictionary/LocalDictionarySupportLoadTableTest.scala @@ -136,6 +136,20 @@ class LocalDictionarySupportLoadTableTest extends QueryTest with BeforeAndAfterA assert(checkForLocalDictionary(getDimRawChunk(2))) } + test("test local dictionary data validation") { +sql("drop table if exists local_query_enable") +sql("drop table if exists local_query_disable") +sql( + "CREATE TABLE local_query_enable(name string) STORED BY 'carbondata' tblproperties" + + "('local_dictionary_enable'='false','local_dictionary_include'='name')") +sql("load data inpath '" + file1 + "' into table local_query_enable OPTIONS('header'='false')") +sql( + "CREATE TABLE local_query_disable(name string) STORED BY 'carbondata' tblproperties" + + "('local_dictionary_enable'='true','local_dictionary_include'='name')") +sql("load data inpath '" + file1 + "' into table local_query_disable OPTIONS('header'='false')") +checkAnswer(sql("select name from local_query_enable"), sql("select name from local_query_disable")) + } + test("test to validate local dictionary values"){ sql("drop table if exists local2") sql("CREATE TABLE local2(name string) STORED BY 'carbondata' tblproperties('local_dictionary_enable'='true')") http://git-wip-us.apache.org/repos/asf/carbondata/blob/2ab2254b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/CarbonDictionaryWrapper.java -- diff --git a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/CarbonDictionaryWrapper.java b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/CarbonDictionaryWrapper.java deleted file mode 100644 index 7f1e577..000 --- a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/CarbonDictionaryWrapper.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package
[03/45] carbondata git commit: [CARBONDATA-2965] support Benchmark command in CarbonCli
[CARBONDATA-2965] support Benchmark command in CarbonCli A new command called "benchmark" is added in CarbonCli tool to output the scan performance of the specified file and column. Example usage: ```bash shell>java -jar carbondata-cli.jar org.apache.carbondata.CarbonCli -cmd benchmark -p hdfs://carbon1:9000/carbon.store/tpchcarbon_base/lineitem/ -a -c l_comment ``` will scan output the scan time of l_comment column in first file in the input folder and prints: (or using -f option to provide the data file instead of folder) ``` ReadHeaderAndFooter takes 12,598 us ConvertFooter takes 4,712 us ReadAllMetaAndConvertFooter takes 8,039 us Scan column 'l_comment' Blocklet#0: ColumnChunkIO takes 222,609 us Blocklet#0: DecompressPage takes 111,985 us Blocklet#1: ColumnChunkIO takes 186,522 us Blocklet#1: DecompressPage takes 89,132 us Blocklet#2: ColumnChunkIO takes 209,129 us Blocklet#2: DecompressPage takes 84,051 us ``` This closes #2755 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e07df44a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e07df44a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e07df44a Branch: refs/heads/branch-1.5 Commit: e07df44a1db52304c54ab4e379f28b0f026449fd Parents: 49f6715 Author: Jacky Li Authored: Sun Sep 23 00:01:04 2018 +0800 Committer: xuchuanyin Committed: Wed Sep 26 15:47:37 2018 +0800 -- .../core/util/DataFileFooterConverterV3.java| 6 +- pom.xml | 7 +- tools/cli/pom.xml | 5 + .../org/apache/carbondata/tool/CarbonCli.java | 90 .../org/apache/carbondata/tool/Command.java | 28 +++ .../org/apache/carbondata/tool/DataFile.java| 94 +++-- .../org/apache/carbondata/tool/DataSummary.java | 188 ++--- .../apache/carbondata/tool/FileCollector.java | 147 + .../apache/carbondata/tool/ScanBenchmark.java | 205 +++ .../apache/carbondata/tool/CarbonCliTest.java | 94 + 10 files changed, 622 insertions(+), 242 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java index 41e22fd..438e3e3 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java @@ -59,12 +59,16 @@ public class DataFileFooterConverterV3 extends AbstractDataFileFooterConverter { */ @Override public DataFileFooter readDataFileFooter(TableBlockInfo tableBlockInfo) throws IOException { -DataFileFooter dataFileFooter = new DataFileFooter(); CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(tableBlockInfo.getFilePath()); FileHeader fileHeader = carbonHeaderReader.readHeader(); CarbonFooterReaderV3 reader = new CarbonFooterReaderV3(tableBlockInfo.getFilePath(), tableBlockInfo.getBlockOffset()); FileFooter3 footer = reader.readFooterVersion3(); +return convertDataFileFooter(fileHeader, footer); + } + + public DataFileFooter convertDataFileFooter(FileHeader fileHeader, FileFooter3 footer) { +DataFileFooter dataFileFooter = new DataFileFooter(); dataFileFooter.setVersionId(ColumnarFormatVersion.valueOf((short) fileHeader.getVersion())); dataFileFooter.setNumberOfRows(footer.getNum_rows()); dataFileFooter.setSegmentInfo(getSegmentInfo(footer.getSegment_info())); http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/pom.xml -- diff --git a/pom.xml b/pom.xml index eff438b..00a5287 100644 --- a/pom.xml +++ b/pom.xml @@ -106,6 +106,7 @@ store/sdk store/search assembly +tools/cli @@ -718,12 +719,6 @@ datamap/mv/core - - tools - -tools/cli - - http://git-wip-us.apache.org/repos/asf/carbondata/blob/e07df44a/tools/cli/pom.xml -- diff --git a/tools/cli/pom.xml b/tools/cli/pom.xml index 0d00438..60e69dc 100644 --- a/tools/cli/pom.xml +++ b/tools/cli/pom.xml @@ -25,6 +25,11 @@ ${project.version} + javax.servlet + servlet-api + 2.5 + + junit junit test
[09/45] carbondata git commit: [CARBONDATA-2973] Added documentation for fallback condition for complex columns in local Dictionary
[CARBONDATA-2973] Added documentation for fallback condition for complex columns in local Dictionary 1. Added documentation for fallback condition for complex columns in local Dictionary 2. Added documentation for system level property" carbon.local.dictionary.decoder.fallback" This closes #2766 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3f99e9b7 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3f99e9b7 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3f99e9b7 Branch: refs/heads/branch-1.5 Commit: 3f99e9b7f87b387f03cb5bece2b2a8c5a50b Parents: a9ddfbd Author: praveenmeenakshi56 Authored: Wed Sep 26 12:40:37 2018 +0530 Committer: manishgupta88 Committed: Wed Sep 26 18:14:44 2018 +0530 -- docs/configuration-parameters.md | 2 +- docs/ddl-of-carbondata.md| 16 +++- docs/faq.md | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3f99e9b7/docs/configuration-parameters.md -- diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md index 7edae47..662525b 100644 --- a/docs/configuration-parameters.md +++ b/docs/configuration-parameters.md @@ -119,7 +119,7 @@ This section provides the details of all the configurations required for the Car | Parameter | Default Value | Description | |--|---|---| -| carbon.max.driver.lru.cache.size | -1 | Maximum memory **(in MB)** upto which the driver process can cache the data (BTree and dictionary values). Beyond this, least recently used data will be removed from cache before loading new set of values.Default value of -1 means there is no memory limit for caching. Only integer values greater than 0 are accepted.**NOTE:** Minimum number of entries that needs to be removed from cache in order to load the new set of data is determined and unloaded.ie.,for example if 3 cache entries qualify for pre-emption, out of these, those entries that free up more cache memory is removed prior to others. Please refer [FAQs](./faq.md#how-to-check-LRU-cache-memory-footprint) for checking LRU cache memory footprint. | +| carbon.max.driver.lru.cache.size | -1 | Maximum memory **(in MB)** upto which the driver process can cache the data (BTree and dictionary values). Beyond this, least recently used data will be removed from cache before loading new set of values.Default value of -1 means there is no memory limit for caching. Only integer values greater than 0 are accepted.**NOTE:** Minimum number of entries that needs to be removed from cache in order to load the new set of data is determined and unloaded.ie.,for example if 3 cache entries qualify for pre-emption, out of these, those entries that free up more cache memory is removed prior to others. Please refer [FAQs](./faq.md#how-to-check-lru-cache-memory-footprint) for checking LRU cache memory footprint. | | carbon.max.executor.lru.cache.size | -1 | Maximum memory **(in MB)** upto which the executor process can cache the data (BTree and reverse dictionary values).Default value of -1 means there is no memory limit for caching. Only integer values greater than 0 are accepted.**NOTE:** If this parameter is not configured, then the value of ***carbon.max.driver.lru.cache.size*** will be used. | | max.query.execution.time | 60 | Maximum time allowed for one query to be executed. The value is in minutes. | | carbon.enableMinMax | true | CarbonData maintains the metadata which enables to prune unnecessary files from being scanned as per the query conditions.To achieve pruning, Min,Max of each column is maintined.Based on the filter condition in the query, certain data can be skipped from scanning by matching the filter value against the min,max values of the column(s) present in that carbondata file.This pruing enhances query performance significantly. | http://git-wip-us.apache.org/repos/asf/carbondata/blob/3f99e9b7/docs/ddl-of-carbondata.md -- diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md index 2a467a2..5eeba86 100644 --- a/docs/ddl-of-carbondata.md +++ b/docs/ddl-of-carbondata.md @@ -231,7 +231,13 @@ CarbonData DDL statements are documented here,which includes: * In case of multi-level complex dataType columns, primitive string/varchar/char columns are considered for local dictionary generation. - Local dictionary will have to be enabled explicitly during create table or by enabling the **system property** ***carbon.local.dictionary.enable***. By default,
[04/45] carbondata git commit: [CARBONDATA-2968] Single pass load fails 2nd time in Spark submit execution due to port binding error
[CARBONDATA-2968] Single pass load fails 2nd time in Spark submit execution due to port binding error Problem : In secure cluster setup, single pass load is failing in spark-submit after using the beeline. Solution: It was happening because port was not getting updated and was not looking for the next empty port. port variable was not changing.So modified that part and added log to diplay the port number. This closes #2760 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/13ecc9e7 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/13ecc9e7 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/13ecc9e7 Branch: refs/heads/branch-1.5 Commit: 13ecc9e7a0a42ebf2f8417814c20474f3ce489f1 Parents: e07df44 Author: shardul-cr7 Authored: Tue Sep 25 19:55:19 2018 +0530 Committer: kumarvishal09 Committed: Wed Sep 26 14:16:21 2018 +0530 -- .../core/dictionary/server/NonSecureDictionaryServer.java | 3 ++- .../spark/dictionary/server/SecureDictionaryServer.java| 6 -- 2 files changed, 6 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/13ecc9e7/core/src/main/java/org/apache/carbondata/core/dictionary/server/NonSecureDictionaryServer.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/dictionary/server/NonSecureDictionaryServer.java b/core/src/main/java/org/apache/carbondata/core/dictionary/server/NonSecureDictionaryServer.java index 95f3d69..dc2d211 100644 --- a/core/src/main/java/org/apache/carbondata/core/dictionary/server/NonSecureDictionaryServer.java +++ b/core/src/main/java/org/apache/carbondata/core/dictionary/server/NonSecureDictionaryServer.java @@ -109,6 +109,7 @@ public class NonSecureDictionaryServer extends AbstractDictionaryServer }); bootstrap.childOption(ChannelOption.SO_KEEPALIVE, true); String hostToBind = findLocalIpAddress(LOGGER); +//iteratively listening to newports InetSocketAddress address = hostToBind == null ? new InetSocketAddress(newPort) : new InetSocketAddress(hostToBind, newPort); @@ -119,7 +120,7 @@ public class NonSecureDictionaryServer extends AbstractDictionaryServer this.host = hostToBind; break; } catch (Exception e) { -LOGGER.error(e, "Dictionary Server Failed to bind to port:"); +LOGGER.error(e, "Dictionary Server Failed to bind to port:" + newPort); if (i == 9) { throw new RuntimeException("Dictionary Server Could not bind to any port"); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/13ecc9e7/integration/spark-common/src/main/java/org/apache/carbondata/spark/dictionary/server/SecureDictionaryServer.java -- diff --git a/integration/spark-common/src/main/java/org/apache/carbondata/spark/dictionary/server/SecureDictionaryServer.java b/integration/spark-common/src/main/java/org/apache/carbondata/spark/dictionary/server/SecureDictionaryServer.java index f4948c4..995e520 100644 --- a/integration/spark-common/src/main/java/org/apache/carbondata/spark/dictionary/server/SecureDictionaryServer.java +++ b/integration/spark-common/src/main/java/org/apache/carbondata/spark/dictionary/server/SecureDictionaryServer.java @@ -143,14 +143,16 @@ public class SecureDictionaryServer extends AbstractDictionaryServer implements TransportServerBootstrap bootstrap = new SaslServerBootstrap(transportConf, securityManager); String host = findLocalIpAddress(LOGGER); -context.createServer(host, port, Lists.newArrayList(bootstrap)); +//iteratively listening to newports +context +.createServer(host, newPort, Lists.newArrayList(bootstrap)); LOGGER.audit("Dictionary Server started, Time spent " + (System.currentTimeMillis() - start) + " Listening on port " + newPort); this.port = newPort; this.host = host; break; } catch (Exception e) { -LOGGER.error(e, "Dictionary Server Failed to bind to port:"); +LOGGER.error(e, "Dictionary Server Failed to bind to port: " + newPort); if (i == 9) { throw new RuntimeException("Dictionary Server Could not bind to any port"); }
[35/45] carbondata git commit: [CARBONDATA-2992] Fixed Between Query Data Mismatch issue for timestamp data type
[CARBONDATA-2992] Fixed Between Query Data Mismatch issue for timestamp data type Problem: Between query is giving wrong result. Root cause: For timestamp time when filter is given in -mm-dd format instead of -mm-dd HH:MM:SS format it will add cast, In CastExpressionOptimization it is using SimpleDateFormat object to parse the filter value which is failing as filter values is not same. Solution: Use SPARK:DateTimeUtils.stringToTime method as spark is handling for above scenario. This closes #2787 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/11bd0ade Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/11bd0ade Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/11bd0ade Branch: refs/heads/branch-1.5 Commit: 11bd0ade93a3ac72b42068c3b57ed8bb1203ab47 Parents: 6aa2a90 Author: kumarvishal09 Authored: Fri Sep 28 18:33:29 2018 +0530 Committer: ravipesala Committed: Thu Oct 4 18:02:08 2018 +0530 -- .../src/test/resources/datedatafile.csv | 7 ++ .../src/test/resources/timestampdatafile.csv| 7 ++ ...imestampNoDictionaryColumnCastTestCase.scala | 80 .../execution/CastExpressionOptimization.scala | 67 +--- .../bloom/BloomCoarseGrainDataMapSuite.scala| 47 +--- 5 files changed, 152 insertions(+), 56 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/resources/datedatafile.csv -- diff --git a/integration/spark-common-test/src/test/resources/datedatafile.csv b/integration/spark-common-test/src/test/resources/datedatafile.csv new file mode 100644 index 000..43a615d --- /dev/null +++ b/integration/spark-common-test/src/test/resources/datedatafile.csv @@ -0,0 +1,7 @@ +datetype1 +2018-09-11 +2018-09-12 +2018-09-13 +2018-09-14 +2018-09-15 +2018-09-16 http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/resources/timestampdatafile.csv -- diff --git a/integration/spark-common-test/src/test/resources/timestampdatafile.csv b/integration/spark-common-test/src/test/resources/timestampdatafile.csv new file mode 100644 index 000..473f330 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/timestampdatafile.csv @@ -0,0 +1,7 @@ +timestamptype +2018-09-11 00:00:00 +2018-09-12 00:00:00 +2018-09-13 00:00:00 +2018-09-14 00:00:00 +2018-09-15 00:00:00 +2018-09-16 00:00:00 http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala new file mode 100644 index 000..41c7005 --- /dev/null +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark.testsuite.directdictionary + + +import org.apache.spark.sql.Row +import org.apache.spark.sql.test.util.QueryTest +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + +/** + * Test Class for detailed query on timestamp datatypes + */ +class TimestampNoDictionaryColumnCastTestCase extends QueryTest with BeforeAndAfterAll { + + override def beforeAll { +CarbonProperties.getInstance()
[30/45] carbondata git commit: [CARBONDATA-2976] Support dumping column chunk metadata in CarbonCli
[CARBONDATA-2976] Support dumping column chunk metadata in CarbonCli By using -k option, CarbonCli will print all column chunk/page metadata for the specified column For example, java CarbonCli -cmd summary -p /home/root1/bin /home/root1/.local/bin /usr/local/sbin /usr/local/bin /usr/sbin /usr/bin /sbin /bin /usr/games /usr/local/games /snap/bin /usr/lib/jvm/java-8-oracle/bin /usr/lib/jvm/java-8-oracle/db/bin /usr/lib/jvm/java-8-oracle/jre/bin -c name -k will output: ## Page Meta for column 'name' in file /Users/jacky/code/carbondata/tools/cli/CarbonCliTest/part-0-138391629343461_batchno0-0-null-138390048546321.carbondata Blocklet 0: Page 0 (offset 0, length 12049): DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED, total_uncompressed_size:0, total_compressed_size:0, compressor_name:snappy), rowMajor:false, data_page_length:12039, rowid_page_length:10, presence:PresenceMeta(represents_presence:false, present_bit_stream:00), sort_state:SORT_EXPLICIT, encoders:[INVERTED_INDEX], encoder_meta:[], min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F 62 6F 74 30], min_max_presence:[true]), numberOfRowsInpage:32000) Page 1 (offset 12049, length 12049): DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED, total_uncompressed_size:0, total_compressed_size:0, compressor_name:snappy), rowMajor:false, data_page_length:12039, rowid_page_length:10, presence:PresenceMeta(represents_presence:false, present_bit_stream:00), sort_state:SORT_EXPLICIT, encoders:[INVERTED_INDEX], encoder_meta:[], min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F 62 6F 74 30], min_max_presence:[true]), numberOfRowsInpage:32000) Page 2 (offset 24098, length 12049): DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED, total_uncompressed_size:0, total_compressed_size:0, compressor_name:snappy), rowMajor:false, data_page_length:12039, rowid_page_length:10, presence:PresenceMeta(represents_presence:false, present_bit_stream:00), sort_state:SORT_EXPLICIT, encoders:[INVERTED_INDEX], encoder_meta:[], min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F 62 6F 74 30], min_max_presence:[true]), numberOfRowsInpage:32000) This closes #2771 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d8003a31 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d8003a31 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d8003a31 Branch: refs/heads/branch-1.5 Commit: d8003a31c602807f00d438d8be392992cb0955ac Parents: fa9c832 Author: Jacky Li Authored: Wed Sep 26 23:51:34 2018 +0800 Committer: ravipesala Committed: Wed Oct 3 20:17:04 2018 +0530 -- .../org/apache/carbondata/tool/CarbonCli.java | 2 + .../org/apache/carbondata/tool/DataFile.java| 8 +++- .../org/apache/carbondata/tool/DataSummary.java | 45 ++-- .../apache/carbondata/tool/CarbonCliTest.java | 13 ++ 4 files changed, 63 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8003a31/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java -- diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java index 5725f8e..f1baa92 100644 --- a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java +++ b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java @@ -65,6 +65,7 @@ public class CarbonCli { Option segment = new Option("m", "showSegment", false, "print segment information"); Option tblProperties = new Option("t", "tblProperties", false, "print table properties"); Option detail = new Option("b", "blocklet", false, "print blocklet size detail"); +Option columnMeta = new Option("k", "columnChunkMeta", false, "print column chunk meta"); Option columnName = OptionBuilder .withArgName("column name") .hasArg() @@ -82,6 +83,7 @@ public class CarbonCli { options.addOption(segment); options.addOption(tblProperties); options.addOption(detail); +options.addOption(columnMeta); options.addOption(columnName); return options; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8003a31/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java -- diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java inde
[02/45] carbondata git commit: [CARBONDATA-2962]Even after carbon file is copied to targetfolder(local/hdfs), carbon files is not deleted from temp directory
[CARBONDATA-2962]Even after carbon file is copied to targetfolder(local/hdfs), carbon files is not deleted from temp directory Problem: Even after carbon file is copied to targetfolder(local/hdfs), carbon files is not deleted from temp directory. Solution: After copying Carbon data and index files from temp directory, delete those files. This closes #2752 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/49f67153 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/49f67153 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/49f67153 Branch: refs/heads/branch-1.5 Commit: 49f67153a21e5a0cb5705adeb0f056eef4d3ed25 Parents: 2ab2254 Author: Indhumathi27 Authored: Mon Sep 24 12:28:47 2018 +0530 Committer: kumarvishal09 Committed: Wed Sep 26 12:35:24 2018 +0530 -- .../store/writer/AbstractFactDataWriter.java| 20 ++-- 1 file changed, 14 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/49f67153/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java index ad0e8e0..4afb3ef 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java @@ -270,12 +270,18 @@ public abstract class AbstractFactDataWriter implements CarbonFactDataWriter { notifyDataMapBlockEnd(); CarbonUtil.closeStreams(this.fileOutputStream, this.fileChannel); if (!enableDirectlyWriteData2Hdfs) { - if (copyInCurrentThread) { -CarbonUtil.copyCarbonDataFileToCarbonStorePath(carbonDataFileTempPath, -model.getCarbonDataDirectoryPath(), fileSizeInBytes); - } else { -executorServiceSubmitList.add(executorService.submit( -new CompleteHdfsBackendThread(carbonDataFileTempPath))); + try { +if (copyInCurrentThread) { + CarbonUtil.copyCarbonDataFileToCarbonStorePath(carbonDataFileTempPath, + model.getCarbonDataDirectoryPath(), fileSizeInBytes); + FileFactory + .deleteFile(carbonDataFileTempPath, FileFactory.getFileType(carbonDataFileTempPath)); +} else { + executorServiceSubmitList + .add(executorService.submit(new CompleteHdfsBackendThread(carbonDataFileTempPath))); +} + } catch (IOException e) { +LOGGER.error("Failed to delete carbondata file from temp location" + e.getMessage()); } } } @@ -405,6 +411,7 @@ public abstract class AbstractFactDataWriter implements CarbonFactDataWriter { CarbonUtil .copyCarbonDataFileToCarbonStorePath(indexFileName, model.getCarbonDataDirectoryPath(), fileSizeInBytes); + FileFactory.deleteFile(indexFileName, FileFactory.getFileType(indexFileName)); } } @@ -470,6 +477,7 @@ public abstract class AbstractFactDataWriter implements CarbonFactDataWriter { public Void call() throws Exception { CarbonUtil.copyCarbonDataFileToCarbonStorePath(fileName, model.getCarbonDataDirectoryPath(), fileSizeInBytes); + FileFactory.deleteFile(fileName, FileFactory.getFileType(fileName)); return null; } }
[12/45] carbondata git commit: [HOTFIX] support "carbon.load.directWriteHdfs.enabled" for S3
[HOTFIX] support "carbon.load.directWriteHdfs.enabled" for S3 problem : Currently for s3, when the above carbon property is set. index file will not be written in the s3 store path due to bug in folder path. Solution: file separator used is wrong. Need to fix it. Also rename a carbon peroperty "carbon.load.directWriteHdfs.enabled" to "carbon.load.directWriteToStorePath.enabled" This closes #2697 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5d17ff40 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5d17ff40 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5d17ff40 Branch: refs/heads/branch-1.5 Commit: 5d17ff40bdeeba64a8885fa2df427fbdec6a38ea Parents: 2a4f530 Author: ajantha-bhat Authored: Thu Sep 6 16:47:22 2018 +0530 Committer: kunal642 Committed: Thu Sep 27 14:03:37 2018 +0530 -- .../constants/CarbonLoadOptionConstants.java| 6 ++-- docs/configuration-parameters.md| 2 +- .../carbondata/examples/sdk/SDKS3Example.java | 12 +++ .../dataload/TestLoadDataGeneral.scala | 8 ++--- .../store/writer/AbstractFactDataWriter.java| 38 ++-- 5 files changed, 39 insertions(+), 27 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/5d17ff40/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java index 3eab69d..82485ca 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonLoadOptionConstants.java @@ -136,9 +136,9 @@ public final class CarbonLoadOptionConstants { public static final String SORT_COLUMN_BOUNDS_ROW_DELIMITER = ";"; @CarbonProperty - public static final String ENABLE_CARBON_LOAD_DIRECT_WRITE_HDFS - = "carbon.load.directWriteHdfs.enabled"; - public static final String ENABLE_CARBON_LOAD_DIRECT_WRITE_HDFS_DEFAULT = "false"; + public static final String ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH + = "carbon.load.directWriteToStorePath.enabled"; + public static final String ENABLE_CARBON_LOAD_DIRECT_WRITE_TO_STORE_PATH_DEFAULT = "false"; /** * If the sort memory is insufficient, spill inmemory pages to disk. http://git-wip-us.apache.org/repos/asf/carbondata/blob/5d17ff40/docs/configuration-parameters.md -- diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md index 662525b..9dd8164 100644 --- a/docs/configuration-parameters.md +++ b/docs/configuration-parameters.md @@ -90,7 +90,7 @@ This section provides the details of all the configurations required for the Car | carbon.prefetch.buffersize | 1000 | When the configuration ***carbon.merge.sort.prefetch*** is configured to true, we need to set the number of records that can be prefetched.This configuration is used specify the number of records to be prefetched.**NOTE: **Configuring more number of records to be prefetched increases memory footprint as more records will have to be kept in memory. | | load_min_size_inmb | 256 | This configuration is used along with ***carbon.load.min.size.enabled***.This determines the minimum size of input files to be considered for distribution among executors while data loading.**NOTE:** Refer to ***carbon.load.min.size.enabled*** for understanding when this configuration needs to be used and its advantages and disadvantages. | | carbon.load.sortmemory.spill.percentage | 0 | During data loading, some data pages are kept in memory upto memory configured in ***carbon.sort.storage.inmemory.size.inmb*** beyond which they are spilled to disk as intermediate temporary sort files.This configuration determines after what percentage data needs to be spilled to disk.**NOTE:** Without this configuration, when the data pages occupy upto configured memory, new data pages would be dumped to disk and old pages are still maintained in disk. | -| carbon.load.directWriteHdfs.enabled | false | During data load all the carbondata files are written to local disk and finally copied to the target location in HDFS.Enabling this parameter will make carrbondata files to be written directly onto target HDFS location bypassing the local disk.**NOTE:** Writing directly to HDFS saves local disk IO(once for writing the files and again for copying to HDFS) there by improving the performance.But the drawback is when data loading fails or the application crashes, unwanted
[27/45] carbondata git commit: [HOTFIX] carbon reader support open another reader without closing previous reader
[HOTFIX] carbon reader support open another reader without closing previous reader [HOTFIX] carbon reader support open another reader without closing previous reader This closes #2790 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6ef4e46b Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6ef4e46b Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6ef4e46b Branch: refs/heads/branch-1.5 Commit: 6ef4e46b0df2d3954788222943df09bf07fdb120 Parents: 9ca985f Author: ajantha-bhat Authored: Fri Sep 28 20:24:39 2018 +0530 Committer: ravipesala Committed: Wed Oct 3 20:06:05 2018 +0530 -- .../hadoop/api/CarbonInputFormat.java | 26 .../sdk/file/CarbonReaderBuilder.java | 7 ++ .../carbondata/sdk/file/CarbonReaderTest.java | 3 +-- 3 files changed, 8 insertions(+), 28 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ef4e46b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java -- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java index db93cbd..ed82e13 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java @@ -774,29 +774,13 @@ m filterExpression */ public String[] projectAllColumns(CarbonTable carbonTable) { List colList = carbonTable.getTableInfo().getFactTable().getListOfColumns(); -List projectColumn = new ArrayList<>(); -// childCount will recursively count the number of children for any parent +List projectColumns = new ArrayList<>(); // complex type and add just the parent column name while skipping the child columns. -int childDimCount = 0; -for (ColumnSchema cols : colList) { - if (cols.getSchemaOrdinal() != -1) { -if (childDimCount == 0) { - projectColumn.add(cols.getColumnName()); -} -if (childDimCount > 0) { - childDimCount--; -} -if (cols.getDataType().isComplexType()) { - childDimCount += cols.getNumberOfChild(); -} +for (ColumnSchema col : colList) { + if (!col.getColumnName().contains(".")) { +projectColumns.add(col.getColumnName()); } } -String[] projectionColumns = new String[projectColumn.size()]; -int i = 0; -for (String columnName : projectColumn) { - projectionColumns[i] = columnName; - i++; -} -return projectionColumns; +return projectColumns.toArray(new String[projectColumns.size()]); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ef4e46b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java -- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java index 151d57c..9651a8f 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java @@ -131,11 +131,8 @@ public class CarbonReaderBuilder { hadoopConf = FileFactory.getConfiguration(); } CarbonTable table; -if (filterExpression != null) { - table = CarbonTable.buildTable(tablePath, tableName, hadoopConf); -} else { - table = CarbonTable.buildDummyTable(tablePath); -} +// now always infer schema. TODO:Refactor in next version. +table = CarbonTable.buildTable(tablePath, tableName, hadoopConf); final CarbonFileInputFormat format = new CarbonFileInputFormat(); final Job job = new Job(hadoopConf); format.setTableInfo(job.getConfiguration(), table.getTableInfo()); http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ef4e46b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java -- diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java index 8d95456..ba8a49d 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java @@ -90,8 +90,6 @@ public class CarbonReaderTest extends TestCase { } Assert.assertEquals(i, 200); -reader.close(); - // Read again CarbonRead
[42/45] carbondata git commit: [Documentation] Readme updated with latest topics and new TOC
[Documentation] Readme updated with latest topics and new TOC Readme updated with the new structure Formatting issue fixed Review comments fixed This closes #2788 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ca30ad97 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ca30ad97 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ca30ad97 Branch: refs/heads/branch-1.5 Commit: ca30ad97da020daceb49669fba454a4346241935 Parents: d392717 Author: sgururajshetty Authored: Fri Sep 28 19:13:08 2018 +0530 Committer: kunal642 Committed: Fri Oct 5 15:00:13 2018 +0530 -- README.md | 33 ++-- docs/carbon-as-spark-datasource-guide.md | 29 ++-- docs/configuration-parameters.md | 158 ++-- docs/datamap-developer-guide.md| 4 +- docs/datamap/bloomfilter-datamap-guide.md | 6 +- docs/datamap/datamap-management.md | 6 +- docs/datamap/lucene-datamap-guide.md | 4 +- docs/datamap/preaggregate-datamap-guide.md | 2 +- docs/ddl-of-carbondata.md | 97 +++- docs/dml-of-carbondata.md | 6 +- docs/documentation.md | 2 +- docs/faq.md| 6 +- docs/file-structure-of-carbondata.md | 2 +- docs/how-to-contribute-to-apache-carbondata.md | 4 +- docs/introduction.md | 20 +-- docs/language-manual.md| 2 + docs/performance-tuning.md | 10 +- docs/quick-start-guide.md | 6 +- docs/s3-guide.md | 2 +- docs/streaming-guide.md| 6 +- docs/usecases.md | 32 ++-- 21 files changed, 229 insertions(+), 208 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/ca30ad97/README.md -- diff --git a/README.md b/README.md index ba2cbf7..87bb71e 100644 --- a/README.md +++ b/README.md @@ -45,23 +45,26 @@ CarbonData file format is a columnar store in HDFS, it has many features that a CarbonData is built using Apache Maven, to [build CarbonData](https://github.com/apache/carbondata/blob/master/build) ## Online Documentation +* [What is CarbonData](https://github.com/apache/carbondata/blob/master/docs/introduction.md) * [Quick Start](https://github.com/apache/carbondata/blob/master/docs/quick-start-guide.md) -* [CarbonData File Structure](https://github.com/apache/carbondata/blob/master/docs/file-structure-of-carbondata.md) -* [Data Types](https://github.com/apache/carbondata/blob/master/docs/supported-data-types-in-carbondata.md) -* [Data Management on CarbonData](https://github.com/apache/carbondata/blob/master/docs/language-manual.md) -* [Configuring Carbondata](https://github.com/apache/carbondata/blob/master/docs/configuration-parameters.md) -* [Streaming Ingestion](https://github.com/apache/carbondata/blob/master/docs/streaming-guide.md) -* [SDK Guide](https://github.com/apache/carbondata/blob/master/docs/sdk-guide.md) -* [S3 Guide](https://github.com/apache/carbondata/blob/master/docs/s3-guide.md) -* [DataMap Developer Guide](https://github.com/apache/carbondata/blob/master/docs/datamap-developer-guide.md) -* [CarbonData DataMap Management](https://github.com/apache/carbondata/blob/master/docs/datamap/datamap-management.md) -* [CarbonData BloomFilter DataMap](https://github.com/apache/carbondata/blob/master/docs/datamap/bloomfilter-datamap-guide.md) -* [CarbonData Lucene DataMap](https://github.com/apache/carbondata/blob/master/docs/datamap/lucene-datamap-guide.md) -* [CarbonData Pre-aggregate DataMap](https://github.com/apache/carbondata/blob/master/docs/datamap/preaggregate-datamap-guide.md) -* [CarbonData Timeseries DataMap](https://github.com/apache/carbondata/blob/master/docs/datamap/timeseries-datamap-guide.md) -* [Performance Tuning](https://github.com/apache/carbondata/blob/master/docs/performance-tuning.md) -* [FAQ](https://github.com/apache/carbondata/blob/master/docs/faq.md) * [Use Cases](https://github.com/apache/carbondata/blob/master/docs/usecases.md) +* [Language Reference](https://github.com/apache/carbondata/blob/master/docs/language-manual.md) + * [CarbonData Data Definition Language](https://github.com/apache/carbondata/blob/master/docs/ddl-of-carbondata.md) + * [CarbonData Data Manipulation Language](https://github.com/apache/carbondata/blob/master/docs/dml-of-carbondata.md) + * [CarbonData Streaming Ingestion](https://github.com/apache/carbondata/blob/master/docs/streaming-guide.md) + * [Configuring
[13/45] carbondata git commit: [CARBONDATA-2970]lock object creation fix for viewFS
[CARBONDATA-2970]lock object creation fix for viewFS Problem when default fs is set to ViewFS then the drop table and load fails with exception saying failed to get lock like meta.lock, tablestatus.lock. This is because when getting locktypeObject we wre not checking for viewfs and we are returning it as local file system and failes while acquiring Solution Check for viewFS also when trying to get the lock object This closes #2762 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1b4109d5 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1b4109d5 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1b4109d5 Branch: refs/heads/branch-1.5 Commit: 1b4109d5b2badc0c10d5522502bd799c6325263c Parents: 5d17ff4 Author: akashrn5 Authored: Tue Sep 25 18:59:04 2018 +0530 Committer: kumarvishal09 Committed: Thu Sep 27 16:46:11 2018 +0530 -- .../java/org/apache/carbondata/core/locks/CarbonLockFactory.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1b4109d5/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockFactory.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockFactory.java b/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockFactory.java index 91677a6..79bad6c 100644 --- a/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/locks/CarbonLockFactory.java @@ -71,7 +71,8 @@ public class CarbonLockFactory { lockTypeConfigured = CarbonCommonConstants.CARBON_LOCK_TYPE_S3; return new S3FileLock(absoluteLockPath, lockFile); -} else if (absoluteLockPath.startsWith(CarbonCommonConstants.HDFSURL_PREFIX)) { +} else if (absoluteLockPath.startsWith(CarbonCommonConstants.HDFSURL_PREFIX) || absoluteLockPath +.startsWith(CarbonCommonConstants.VIEWFSURL_PREFIX)) { lockTypeConfigured = CarbonCommonConstants.CARBON_LOCK_TYPE_HDFS; return new HdfsFileLock(absoluteLockPath, lockFile); } else {
[29/45] carbondata git commit: [CARBONDATA-2984][Streaming] Fix NPE when there is no data in the task of a batch
[CARBONDATA-2984][Streaming] Fix NPE when there is no data in the task of a batch Fix NPE when there is no data in the task of a batch Streaming batch maybe has no data, so it doesn't require to append blocklet to streaming file. So it doesn't need to update min/max index of streaming file, just use min/max index of old file . This closes #2782 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fa9c8323 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fa9c8323 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fa9c8323 Branch: refs/heads/branch-1.5 Commit: fa9c8323c11c083452d75886cbbdad1f23d6dfb7 Parents: 0b16816 Author: QiangCai Authored: Fri Sep 28 14:48:39 2018 +0800 Committer: ravipesala Committed: Wed Oct 3 20:13:50 2018 +0530 -- .../TestStreamingTableOperation.scala | 49 +++- .../streaming/CarbonStreamRecordWriter.java | 5 +- .../streaming/segment/StreamSegment.java| 15 -- 3 files changed, 61 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa9c8323/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala -- diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala index 43c1e5a..607c429 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala @@ -37,6 +37,7 @@ import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.common.exceptions.NoSuchStreamException import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.metadata.schema.datamap.DataMapClassProvider.TIMESERIES import org.apache.carbondata.core.metadata.schema.table.CarbonTable import org.apache.carbondata.core.statusmanager.{FileFormat, SegmentStatus} @@ -125,6 +126,8 @@ class TestStreamingTableOperation extends QueryTest with BeforeAndAfterAll { createTable(tableName = "agg_table", streaming = true, withBatchLoad = false) +createTable(tableName = "stream_table_empty", streaming = true, withBatchLoad = false) + var csvDataDir = integrationPath + "/spark2/target/csvdatanew" generateCSVDataFile(spark, idStart = 10, rowNums = 5, csvDataDir) generateCSVDataFile(spark, idStart = 10, rowNums = 5, csvDataDir, SaveMode.Append) @@ -213,6 +216,7 @@ class TestStreamingTableOperation extends QueryTest with BeforeAndAfterAll { sql("drop table if exists streaming.stream_table_reopen") sql("drop table if exists streaming.stream_table_drop") sql("drop table if exists streaming.agg_table_block") +sql("drop table if exists streaming.stream_table_empty") } // normal table not support streaming ingest @@ -226,7 +230,7 @@ class TestStreamingTableOperation extends QueryTest with BeforeAndAfterAll { .asInstanceOf[CarbonRelation].metaData.carbonTable var server: ServerSocket = null try { - server = getServerSocket + server = getServerSocket() val thread1 = createWriteSocketThread(server, 2, 10, 1) thread1.start() // use thread pool to catch the exception of sink thread @@ -2253,6 +2257,46 @@ class TestStreamingTableOperation extends QueryTest with BeforeAndAfterAll { sql("DROP TABLE IF EXISTS dim") } + // test empty batch + test("test empty batch") { +executeStreamingIngest( + tableName = "stream_table_empty", + batchNums = 1, + rowNumsEachBatch = 10, + intervalOfSource = 1, + intervalOfIngest = 3, + continueSeconds = 10, + generateBadRecords = false, + badRecordAction = "force", + autoHandoff = false +) +var result = sql("select count(*) from streaming.stream_table_empty").collect() +assert(result(0).getLong(0) == 10) + +// clean checkpointDir and logDir +val carbonTable = CarbonEnv.getCarbonTable(Option("streaming"), "stream_table_empty")(spark) +FileFactory + .deleteAllFilesOfDir(new File(CarbonTablePath.getStreamingLogDir(carbonTable.getTablePath))) +FileFactory + .deleteAllFilesOfDir(new File(CarbonTablePath +.getStreamingCheckpointDir(carbonTable.getTa
[06/45] carbondata git commit: [CARBONDATA-2966]Update Documentation For Avro DataType conversion
[CARBONDATA-2966]Update Documentation For Avro DataType conversion Updated document for the following features: 1. Avro DataType conversion to carbon 2. Remove min, max for varchar columns 3. LRU enhancements for driver cache This closes #2756 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b3a5e3a8 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b3a5e3a8 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b3a5e3a8 Branch: refs/heads/branch-1.5 Commit: b3a5e3a8bb4b051779f91bca071336703742296c Parents: d84cd81 Author: Indhumathi27 Authored: Mon Sep 24 23:34:04 2018 +0530 Committer: kunal642 Committed: Wed Sep 26 16:16:02 2018 +0530 -- docs/configuration-parameters.md | 6 ++- docs/faq.md| 16 +++ docs/sdk-guide.md | 55 + docs/supported-data-types-in-carbondata.md | 1 + 4 files changed, 58 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/b3a5e3a8/docs/configuration-parameters.md -- diff --git a/docs/configuration-parameters.md b/docs/configuration-parameters.md index c6b0fcb..7edae47 100644 --- a/docs/configuration-parameters.md +++ b/docs/configuration-parameters.md @@ -42,6 +42,7 @@ This section provides the details of all the configurations required for the Car | carbon.lock.type | LOCALLOCK | This configuration specifies the type of lock to be acquired during concurrent operations on table. There are following types of lock implementation: - LOCALLOCK: Lock is created on local file system as file. This lock is useful when only one spark driver (thrift server) runs on a machine and no other CarbonData spark application is launched concurrently. - HDFSLOCK: Lock is created on HDFS file system as file. This lock is useful when multiple CarbonData spark applications are launched and no ZooKeeper is running on cluster and HDFS supports file based locking. | | carbon.lock.path | TABLEPATH | This configuration specifies the path where lock files have to be created. Recommended to configure zookeeper lock type or configure HDFS lock path(to this property) in case of S3 file system as locking is not feasible on S3. | | carbon.unsafe.working.memory.in.mb | 512 | CarbonData supports storing data in off-heap memory for certain operations during data loading and query.This helps to avoid the Java GC and thereby improve the overall performance.The Minimum value recommeded is 512MB.Any value below this is reset to default value of 512MB.**NOTE:** The below formulas explain how to arrive at the off-heap size required.Memory Required For Data Loading:(*carbon.number.of.cores.while.loading*) * (Number of tables to load in parallel) * (*offheap.sort.chunk.size.inmb* + *carbon.blockletgroup.size.in.mb* + *carbon.blockletgroup.size.in.mb*/3.5 ). Memory required for Query:SPARK_EXECUTOR_INSTANCES * (*carbon.blockletgroup.size.in.mb* + *carbon.blockletgroup.size.in.mb* * 3.5) * spark.executor.cores | +| carbon.unsafe.driver.working.memory.in.mb | 60% of JVM Heap Memory | CarbonData supports storing data in unsafe on-heap memory in driver for certain operations like insert into, query for loading datamap cache. The Minimum value recommended is 512MB. | | carbon.update.sync.folder | /tmp/carbondata | CarbonData maintains last modification time entries in modifiedTime.mdt to determine the schema changes and reload only when necessary.This configuration specifies the path where the file needs to be written. | | carbon.invisible.segments.preserve.count | 200 | CarbonData maintains each data load entry in tablestatus file. The entries from this file are not deleted for those segments that are compacted or dropped, but are made invisible.If the number of data loads are very high, the size and number of entries in tablestatus file can become too many causing unnecessary reading of all data.This configuration specifies the number of segment entries to be maintained afte they are compacted or dropped.Beyond this, the entries are moved to a separate history tablestatus file.**NOTE:** The entries in tablestatus file help to identify the operations performed on CarbonData table and is also used for checkpointing during various data manupulation operations.This is similar to AUDIT file maintaining all the operations and its status.Hence the entries are never deleted but moved to a separate history file. | | carbon.lock.retries | 3 | CarbonData ensures consistency of operations by blocking certain operations from running in parallel.In order to block the operations from running in parallel, lock is obtained on the table.This configuration
[07/45] carbondata git commit: [HOTFIX] fix compile error after merging PR-2749
[HOTFIX] fix compile error after merging PR-2749 fix compile error after merging PR-2749 This closes #2763 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/a9ddfbd7 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/a9ddfbd7 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/a9ddfbd7 Branch: refs/heads/branch-1.5 Commit: a9ddfbd7bdc9dc2e4c68b9bc53878ba257c80cd3 Parents: b3a5e3a Author: Zhang Zhichao <441586...@qq.com> Authored: Wed Sep 26 01:00:11 2018 +0800 Committer: Jacky Li Committed: Wed Sep 26 18:48:50 2018 +0800 -- .../org/apache/carbondata/tool/CarbonCliTest.java | 16 1 file changed, 4 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/a9ddfbd7/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java -- diff --git a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java index 4dc34c4..fcd46c8 100644 --- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java +++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java @@ -45,10 +45,8 @@ public class CarbonCliTest { fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); -TestUtil.writeFilesAndVerify(500, new Schema(fields), path, new String[]{"name"}, -true, 3, 8, true); -TestUtil.writeFilesAndVerify(500, new Schema(fields), path, new String[]{"name"}, -true, 3, 8, true); +TestUtil.writeFilesAndVerify(500, new Schema(fields), path, new String[]{"name"}, 3, 8); +TestUtil.writeFilesAndVerify(500, new Schema(fields), path, new String[]{"name"}, 3, 8); } @Test @@ -102,10 +100,7 @@ public class CarbonCliTest { Assert.assertTrue( output.contains( "## Table Properties\n" - + "Property Name Property Value \n" - + "'table_blocksize' '8' \n" - + "'table_blocklet_size' '3' \n" - + "'local_dictionary_enable' 'false'")); + + "schema file not found")); String[] args4 = {"-cmd", "summary", "-p", path, "-b"}; out = new ByteArrayOutputStream(); @@ -163,10 +158,7 @@ public class CarbonCliTest { Assert.assertTrue( output.contains( "## Table Properties\n" - + "Property Name Property Value \n" - + "'table_blocksize' '8' \n" - + "'table_blocklet_size' '3' \n" - + "'local_dictionary_enable' 'false'")); + + "schema file not found")); Assert.assertTrue( output.contains(
[40/45] carbondata git commit: [Documentation] Readme updated with latest topics and new TOC
http://git-wip-us.apache.org/repos/asf/carbondata/blob/ca30ad97/docs/datamap-developer-guide.md -- diff --git a/docs/datamap-developer-guide.md b/docs/datamap-developer-guide.md index 6bac9b5..60f93df 100644 --- a/docs/datamap-developer-guide.md +++ b/docs/datamap-developer-guide.md @@ -6,7 +6,7 @@ Currently, there are two 2 types of DataMap supported: 1. IndexDataMap: DataMap that leverages index to accelerate filter query 2. MVDataMap: DataMap that leverages Materialized View to accelerate olap style query, like SPJG query (select, predicate, join, groupby) -### DataMap provider +### DataMap Provider When user issues `CREATE DATAMAP dm ON TABLE main USING 'provider'`, the corresponding DataMapProvider implementation will be created and initialized. Currently, the provider string can be: 1. preaggregate: A type of MVDataMap that do pre-aggregate of single table @@ -15,5 +15,5 @@ Currently, the provider string can be: When user issues `DROP DATAMAP dm ON TABLE main`, the corresponding DataMapProvider interface will be called. -Details about [DataMap Management](./datamap/datamap-management.md#datamap-management) and supported [DSL](./datamap/datamap-management.md#overview) are documented [here](./datamap/datamap-management.md). +Click for more details about [DataMap Management](./datamap/datamap-management.md#datamap-management) and supported [DSL](./datamap/datamap-management.md#overview). http://git-wip-us.apache.org/repos/asf/carbondata/blob/ca30ad97/docs/datamap/bloomfilter-datamap-guide.md -- diff --git a/docs/datamap/bloomfilter-datamap-guide.md b/docs/datamap/bloomfilter-datamap-guide.md index b2e7d60..fb244fe 100644 --- a/docs/datamap/bloomfilter-datamap-guide.md +++ b/docs/datamap/bloomfilter-datamap-guide.md @@ -15,7 +15,7 @@ limitations under the License. --> -# CarbonData BloomFilter DataMap (Alpha Feature) +# CarbonData BloomFilter DataMap * [DataMap Management](#datamap-management) * [BloomFilter Datamap Introduction](#bloomfilter-datamap-introduction) @@ -46,7 +46,7 @@ Showing all DataMaps on this table ``` Disable Datamap -> The datamap by default is enabled. To support tuning on query, we can disable a specific datamap during query to observe whether we can gain performance enhancement from it. This will only take effect current session. +> The datamap by default is enabled. To support tuning on query, we can disable a specific datamap during query to observe whether we can gain performance enhancement from it. This is effective only for current session. ``` // disable the datamap @@ -82,7 +82,7 @@ and we always query on `id` and `name` with precise value. since `id` is in the sort_columns and it is orderd, query on it will be fast because CarbonData can skip all the irrelative blocklets. But queries on `name` may be bad since the blocklet minmax may not help, -because in each blocklet the range of the value of `name` may be the same -- all from A*~z*. +because in each blocklet the range of the value of `name` may be the same -- all from A* to z*. In this case, user can create a BloomFilter datamap on column `name`. Moreover, user can also create a BloomFilter datamap on the sort_columns. This is useful if user has too many segments and the range of the value of sort_columns are almost the same. http://git-wip-us.apache.org/repos/asf/carbondata/blob/ca30ad97/docs/datamap/datamap-management.md -- diff --git a/docs/datamap/datamap-management.md b/docs/datamap/datamap-management.md index bf52c05..ad8718a 100644 --- a/docs/datamap/datamap-management.md +++ b/docs/datamap/datamap-management.md @@ -66,9 +66,9 @@ If user create MV datamap without specifying `WITH DEFERRED REBUILD`, carbondata ### Automatic Refresh When user creates a datamap on the main table without using `WITH DEFERRED REBUILD` syntax, the datamap will be managed by system automatically. -For every data load to the main table, system will immediately triger a load to the datamap automatically. These two data loading (to main table and datamap) is executed in a transactional manner, meaning that it will be either both success or neither success. +For every data load to the main table, system will immediately trigger a load to the datamap automatically. These two data loading (to main table and datamap) is executed in a transactional manner, meaning that it will be either both success or neither success. -The data loading to datamap is incremental based on Segment concept, avoiding a expesive total rebuild. +The data loading to datamap is incremental based on Segment concept, avoiding a expensive total rebuild. If user perform following command on the main table, system will return failure. (reject the operation) @@ -87,7 +87,7 @@ We do
[08/45] carbondata git commit: [CARBONDATA-2957][DOC] update doc for supporting compressor in table property
[CARBONDATA-2957][DOC] update doc for supporting compressor in table property add doc for supporting compressor in table property This closes #2744 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3cd8b947 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3cd8b947 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3cd8b947 Branch: refs/heads/branch-1.5 Commit: 3cd8b947c9d671f5d409f3b6c589c65afc2545ee Parents: 3f99e9b Author: xuchuanyin Authored: Fri Sep 21 17:06:48 2018 +0800 Committer: kunal642 Committed: Wed Sep 26 18:13:57 2018 +0530 -- docs/ddl-of-carbondata.md | 26 ++ 1 file changed, 26 insertions(+) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3cd8b947/docs/ddl-of-carbondata.md -- diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md index 5eeba86..7cda9cd 100644 --- a/docs/ddl-of-carbondata.md +++ b/docs/ddl-of-carbondata.md @@ -32,6 +32,7 @@ CarbonData DDL statements are documented here,which includes: * [Caching Level](#caching-at-block-or-blocklet-level) * [Hive/Parquet folder Structure](#support-flat-folder-same-as-hiveparquet) * [Extra Long String columns](#string-longer-than-32000-characters) + * [Compression for Table](#compression-for-table) * [CREATE TABLE AS SELECT](#create-table-as-select) * [CREATE EXTERNAL TABLE](#create-external-table) * [External Table on Transactional table location](#create-external-table-on-managed-table-data-location) @@ -424,6 +425,31 @@ CarbonData DDL statements are documented here,which includes: **NOTE:** The LONG_STRING_COLUMNS can only be string/char/varchar columns and cannot be dictionary_include/sort_columns/complex columns. + - # Compression for table + + Data compression is also supported by CarbonData. + By default, Snappy is used to compress the data. CarbonData also support ZSTD compressor. + User can specify the compressor in the table property: + + ``` + TBLPROPERTIES('carbon.column.compressor'='snappy') + ``` + or + ``` + TBLPROPERTIES('carbon.column.compressor'='zstd') + ``` + If the compressor is configured, all the data loading and compaction will use that compressor. + If the compressor is not configured, the data loading and compaction will use the compressor from current system property. + In this scenario, the compressor for each load may differ if the system property is changed each time. This is helpful if you want to change the compressor for a table. + The corresponding system property is configured in carbon.properties file as below: + ``` + carbon.column.compressor=snappy + ``` + or + ``` + carbon.column.compressor=zstd + ``` + ## CREATE TABLE AS SELECT This function allows user to create a Carbon table from any of the Parquet/Hive/Carbon table. This is beneficial when the user wants to create Carbon table from any other Parquet/Hive table and use the Carbon query engine to query and achieve better query results for cases where Carbon is faster than other file formats. Also this feature can be used for backing up the data.
[11/45] carbondata git commit: [HOTFIX] Fix NPE in LRU cache when entry from the same table is getting evicted to load another entry from same table
[HOTFIX] Fix NPE in LRU cache when entry from the same table is getting evicted to load another entry from same table Problem When driver LRU cache size is configured to a small value then on running concurrent queries sometimes while loading the block dataMap in LRU cache one of the dataMap entries from the same table is getting deleted because of shortage of space. Due to this in the flow after loading the dataMap cache NPE is thrown. This is because when an cacheable entry is removed from LRU cache then invalidate is called on that cacheable entry to clear the unsafe memory used by that entry. Invalidate method makes the references null and clears the unsafe memory which leads to NPE when accessed again. Solution Currently dataMap cache uses unsafe offheap memory for datamap caching. To avoid this the code is modified to use unsafe with onheap so that JVM itself takes care of clearing the memory when required. We do not require to explicitly set the references to null. This closes #2759 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2a4f5300 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2a4f5300 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2a4f5300 Branch: refs/heads/branch-1.5 Commit: 2a4f53001058346843e0248c60fee2943087efc9 Parents: 5c0da31 Author: manishgupta88 Authored: Tue Sep 25 19:21:08 2018 +0530 Committer: ravipesala Committed: Thu Sep 27 12:08:28 2018 +0530 -- .../indexstore/BlockletDataMapIndexWrapper.java | 1 - .../core/indexstore/SafeMemoryDMStore.java | 1 - .../core/indexstore/UnsafeMemoryDMStore.java | 16 +--- .../indexstore/blockletindex/BlockDataMap.java | 2 -- .../core/memory/HeapMemoryAllocator.java | 8 ++-- .../core/memory/UnsafeMemoryManager.java | 19 +++ .../util/AbstractDataFileFooterConverter.java| 4 ++-- 7 files changed, 32 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/2a4f5300/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexWrapper.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexWrapper.java index 7b8a13b..33d69aa 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexWrapper.java @@ -70,7 +70,6 @@ public class BlockletDataMapIndexWrapper implements Cacheable, Serializable { for (DataMap dataMap : dataMaps) { dataMap.clear(); } -dataMaps = null; } public List getDataMaps() { http://git-wip-us.apache.org/repos/asf/carbondata/blob/2a4f5300/core/src/main/java/org/apache/carbondata/core/indexstore/SafeMemoryDMStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/SafeMemoryDMStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/SafeMemoryDMStore.java index 0b3d4d8..042790f 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/SafeMemoryDMStore.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/SafeMemoryDMStore.java @@ -62,7 +62,6 @@ public class SafeMemoryDMStore extends AbstractMemoryDMStore { if (!isMemoryFreed) { if (null != dataMapRows) { dataMapRows.clear(); -dataMapRows = null; } isMemoryFreed = true; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/2a4f5300/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java index 3e8ce12..196559a 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java @@ -19,6 +19,7 @@ package org.apache.carbondata.core.indexstore; import org.apache.carbondata.core.indexstore.row.DataMapRow; import org.apache.carbondata.core.indexstore.row.UnsafeDataMapRow; import org.apache.carbondata.core.indexstore.schema.CarbonRowSchema; +import org.apache.carbondata.core.memory.MemoryAllocator; import org.apache.carbondata.core.memory.MemoryBlock; import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.memory.UnsafeMemoryManager
[34/45] carbondata git commit: [CARBONDATA-2975] DefaultValue choosing and removeNullValues on rangefilters is incorrect
[CARBONDATA-2975] DefaultValue choosing and removeNullValues on rangefilters is incorrect This closes #2770 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6aa2a90b Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6aa2a90b Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6aa2a90b Branch: refs/heads/branch-1.5 Commit: 6aa2a90bf113132880694e305a264cdccc4c5693 Parents: 18fbdfc Author: dhatchayani Authored: Wed Sep 26 20:49:06 2018 +0530 Committer: ravipesala Committed: Thu Oct 4 17:22:14 2018 +0530 -- .../apache/carbondata/core/scan/filter/FilterUtil.java | 11 ++- .../filter/executer/RangeValueFilterExecuterImpl.java| 2 +- .../executer/RowLevelRangeGrtThanFiterExecuterImpl.java | 10 -- .../RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java | 10 -- .../RowLevelRangeLessThanEqualFilterExecuterImpl.java| 11 +++ .../RowLevelRangeLessThanFilterExecuterImpl.java | 11 +++ .../spark/testsuite/sortcolumns/TestSortColumns.scala| 6 ++ 7 files changed, 43 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java index b4354d2..fe92c42 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java @@ -1947,11 +1947,12 @@ public final class FilterUtil { public static void removeNullValues(DimensionColumnPage dimensionColumnPage, BitSet bitSet, byte[] defaultValue) { if (!bitSet.isEmpty()) { - if (null != dimensionColumnPage.getNullBits() && !dimensionColumnPage.getNullBits().isEmpty() - && !dimensionColumnPage.isExplicitSorted() && !dimensionColumnPage.isAdaptiveEncoded()) { -for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) { - if (dimensionColumnPage.getNullBits().get(i)) { -bitSet.flip(i); + if (null != dimensionColumnPage.getNullBits()) { +if (!dimensionColumnPage.getNullBits().isEmpty()) { + for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) { +if (dimensionColumnPage.getNullBits().get(i)) { + bitSet.flip(i); +} } } } else { http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java index b9729db..886a13b 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java @@ -632,7 +632,7 @@ public class RangeValueFilterExecuterImpl implements FilterExecuter { } else { if (dimColEvaluatorInfo.getDimension().getDataType() == DataTypes.STRING) { defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; -} else { +} else if (!dimensionColumnPage.isAdaptiveEncoded()) { defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY; } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java index 63a5976..c6835f8 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java @@ -28,6 +28,7 @@ import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.c
[24/45] carbondata git commit: [CARBONDATA-2952] Provide CarbonReader C++ interface for SDK
[CARBONDATA-2952] Provide CarbonReader C++ interface for SDK 1.init carbonreader,config data path and tablename 2.config ak sk endpoing for S3 3.configure projection 4.build carbon reader 5.hasNext 6.readNextRow 7.close This closes #2738 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e9a198ab Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e9a198ab Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e9a198ab Branch: refs/heads/branch-1.5 Commit: e9a198ab65c7bc9642e8a3186072f39aa2e5f0f6 Parents: d8a51c9 Author: xubo245 Authored: Thu Sep 20 18:35:34 2018 +0800 Committer: kunal642 Committed: Wed Oct 3 17:53:35 2018 +0530 -- .gitignore | 1 + .../core/constants/CarbonCommonConstants.java | 4 + docs/CSDK-guide.md | 197 ++ examples/spark2/pom.xml | 2 +- pom.xml | 2 + store/CSDK/CMakeLists.txt | 17 ++ store/CSDK/CarbonReader.cpp | 103 ++ store/CSDK/CarbonReader.h | 105 ++ store/CSDK/main.cpp | 200 +++ store/sdk/pom.xml | 5 + .../carbondata/sdk/file/CarbonReader.java | 30 +++ .../sdk/file/CarbonReaderBuilder.java | 16 ++ 12 files changed, 681 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/e9a198ab/.gitignore -- diff --git a/.gitignore b/.gitignore index 5d66a40..00e4934 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ .settings .cache target/ +store/CSDK/cmake-build-debug/* .project .classpath metastore_db/ http://git-wip-us.apache.org/repos/asf/carbondata/blob/e9a198ab/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index faad0dc..46139c9 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -507,6 +507,10 @@ public final class CarbonCommonConstants { */ public static final String FILE_SEPARATOR = "/"; /** + * ARRAY separator + */ + public static final String ARRAY_SEPARATOR = "\001"; + /** * MAX_QUERY_EXECUTION_TIME */ @CarbonProperty http://git-wip-us.apache.org/repos/asf/carbondata/blob/e9a198ab/docs/CSDK-guide.md -- diff --git a/docs/CSDK-guide.md b/docs/CSDK-guide.md new file mode 100644 index 000..c4f4a31 --- /dev/null +++ b/docs/CSDK-guide.md @@ -0,0 +1,197 @@ + + +# CSDK Guide + +CarbonData CSDK provides C++ interface to write and read carbon file. +CSDK use JNI to invoke java SDK in C++ code. + + +# CSDK Reader +This CSDK reader reads CarbonData file and carbonindex file at a given path. +External client can make use of this reader to read CarbonData files in C++ +code and without CarbonSession. + + +In the carbon jars package, there exist a carbondata-sdk.jar, +including SDK reader for CSDK. +## Quick example +``` +// 1. init JVM +JavaVM *jvm; +JNIEnv *initJVM() { +JNIEnv *env; +JavaVMInitArgs vm_args; +int parNum = 3; +int res; +JavaVMOption options[parNum]; + +options[0].optionString = "-Djava.compiler=NONE"; +options[1].optionString = "-Djava.class.path=../../sdk/target/carbondata-sdk.jar"; +options[2].optionString = "-verbose:jni"; +vm_args.version = JNI_VERSION_1_8; +vm_args.nOptions = parNum; +vm_args.options = options; +vm_args.ignoreUnrecognized = JNI_FALSE; + +res = JNI_CreateJavaVM(, (void **) , _args); +if (res < 0) { +fprintf(stderr, "\nCan't create Java VM\n"); +exit(1); +} + +return env; +} + +// 2. create carbon reader and read data +// 2.1 read data from local disk +/** + * test read data from local disk, without projection + * + * @param env jni env + * @return + */ +bool readFromLocalWithoutProjection(JNIEnv *env) { + +CarbonReader carbonReaderClass; +carbonReaderClass.builder(env, "../resources/carbondata", "test"); +carbonReaderClass.build(); + +while (carbonReaderClass.hasNext()) { +jobjectArray row = carbonReaderClass.readNextRow(); +jsize length = env->GetArrayLength(row); +int j = 0; +for (j = 0; j < length; j++) { +
[45/45] carbondata git commit: [CARBONDATA-2990] Queries slow down after some time due to broadcast issue
[CARBONDATA-2990] Queries slow down after some time due to broadcast issue Problem It is observed that during consecutive run of queries after some time queries are slowing down. This is causing the degrade in query performance. No exception is thrown in driver and executor logs but as observed from the logs the time to broadcast hadoop conf is increasing after every query run. Analysis This is happening because in carbon SerializableConfiguration class is overriden from spark. Spark registers this class with Kryo serializer and hence the computation using the kryo is fast. The same benefit is not observed in carbondata becuase of overriding the class. Internal Spark sizeEstimator calculates the size of object and there are few extra objects in carbondata overriden class because of which the computation time is increasing. Solution Use the spark class instead of overriding the class in carbondata This closes #2803 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3c7b3399 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3c7b3399 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3c7b3399 Branch: refs/heads/branch-1.5 Commit: 3c7b33992e06d81fb47d81bf8ccf7884f845b3ff Parents: 19097f2 Author: manishgupta88 Authored: Mon Oct 8 19:38:54 2018 +0530 Committer: ravipesala Committed: Tue Oct 9 13:38:51 2018 +0530 -- .../carbondata/spark/load/CsvRDDHelper.scala| 4 +-- .../load/DataLoadProcessBuilderOnSpark.scala| 6 ++-- .../load/DataLoadProcessorStepOnSpark.scala | 6 ++-- .../apache/carbondata/spark/rdd/CarbonRDD.scala | 4 +-- .../spark/rdd/NewCarbonDataLoadRDD.scala| 33 .../apache/spark/sql/util/SparkSQLUtil.scala| 21 - .../spark/rdd/CarbonDataRDDFactory.scala| 4 +-- .../spark/sql/CarbonDictionaryDecoder.scala | 8 ++--- .../management/CarbonLoadDataCommand.scala | 7 +++-- .../command/mutation/DeleteExecution.scala | 7 ++--- .../command/mutation/HorizontalCompaction.scala | 8 ++--- 11 files changed, 46 insertions(+), 62 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3c7b3399/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala index 8d6dd32..5511645 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala @@ -34,6 +34,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, PartitionedFile} +import org.apache.spark.sql.util.SparkSQLUtil import org.apache.spark.sql.util.SparkSQLUtil.sessionState import org.apache.carbondata.common.logging.{LogService, LogServiceFactory} @@ -41,7 +42,6 @@ import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.util.ThreadLocalSessionInfo import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat import org.apache.carbondata.processing.loading.model.CarbonLoadModel -import org.apache.carbondata.spark.rdd.SerializableConfiguration import org.apache.carbondata.spark.util.CommonUtil object CsvRDDHelper { @@ -110,7 +110,7 @@ object CsvRDDHelper { closePartition() // 2. read function -val serializableConfiguration = new SerializableConfiguration(hadoopConf) +val serializableConfiguration = SparkSQLUtil.getSerializableConfigurableInstance(hadoopConf) val readFunction = new (PartitionedFile => Iterator[InternalRow]) with Serializable { override def apply(file: PartitionedFile): Iterator[InternalRow] = { new Iterator[InternalRow] { http://git-wip-us.apache.org/repos/asf/carbondata/blob/3c7b3399/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala index 2e74a94..923676c 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala +++ b/integration/spark-com
[05/45] carbondata git commit: [DOC] Add spark carbon file format documentation
[DOC] Add spark carbon file format documentation Add spark carbon file format documentation This closes #2757 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d84cd817 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d84cd817 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d84cd817 Branch: refs/heads/branch-1.5 Commit: d84cd817a8ed90e79c12dcee7334b6e52d030982 Parents: 13ecc9e Author: ravipesala Authored: Tue Sep 25 16:19:16 2018 +0530 Committer: kunal642 Committed: Wed Sep 26 16:11:55 2018 +0530 -- docs/carbon-as-spark-datasource-guide.md | 100 ++ 1 file changed, 100 insertions(+) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d84cd817/docs/carbon-as-spark-datasource-guide.md -- diff --git a/docs/carbon-as-spark-datasource-guide.md b/docs/carbon-as-spark-datasource-guide.md new file mode 100644 index 000..1d286cf --- /dev/null +++ b/docs/carbon-as-spark-datasource-guide.md @@ -0,0 +1,100 @@ + + +# Carbon as Spark's datasource guide + +Carbon fileformat can be integrated to Spark using datasource to read and write data without using CarbonSession. + + +# Create Table with DDL + +Carbon table can be created with spark's datasource DDL syntax as follows. + +``` + CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name + [(col_name1 col_type1 [COMMENT col_comment1], ...)] + USING carbon + [OPTIONS (key1=val1, key2=val2, ...)] + [PARTITIONED BY (col_name1, col_name2, ...)] + [CLUSTERED BY (col_name3, col_name4, ...) INTO num_buckets BUCKETS] + [LOCATION path] + [COMMENT table_comment] + [TBLPROPERTIES (key1=val1, key2=val2, ...)] + [AS select_statement] +``` + +## Supported OPTIONS + +| Property | Default Value | Description | +|---|--|| +| table_blocksize | 1024 | Size of blocks to write onto hdfs | +| table_blocklet_size | 64 | Size of blocklet to write | +| local_dictionary_threshold | 1 | Cardinality upto which the local dictionary can be generated | +| local_dictionary_enable | false | Enable local dictionary generation | +| sort_columns | all dimensions are sorted | comma separated string columns which to include in sort and its order of sort | +| sort_scope | local_sort | Sort scope of the load.Options include no sort, local sort ,batch sort and global sort | +| long_string_columns | null | comma separated string columns which are more than 32k length | + +## Example + +``` + CREATE TABLE CARBON_TABLE (NAME STRING) USING CARBON OPTIONS(‘table_block_size’=’256’) +``` + +Note: User can only apply the features of what spark datasource like parquet supports. It cannot support the features of carbon session like IUD, compaction etc. + +# Using DataFrame + +Carbon format can be used in dataframe also using the following way. + +Write carbon using dataframe +``` +df.write.format("carbon").save(path) +``` + +Read carbon using dataframe +``` +val df = spark.read.format("carbon").load(path) +``` + +## Example + +``` +import org.apache.spark.sql.SparkSession + +val spark = SparkSession + .builder() + .appName("Spark SQL basic example") + .config("spark.some.config.option", "some-value") + .getOrCreate() + +// For implicit conversions like converting RDDs to DataFrames +import spark.implicits._ +val df = spark.sparkContext.parallelize(1 to 10 * 10 * 1000) + .map(x => (r.nextInt(10), "name" + x % 8, "city" + x % 50, BigDecimal.apply(x % 60))) + .toDF("ID", "name", "city", "age") + +// Write to carbon format +df.write.format("carbon").save("/user/person_table") + +// Read carbon using dataframe +val dfread = spark.read.format("carbon").load("/user/person_table") +dfread.show() +``` + +Reference : [list of carbon properties](./configuration-parameters.md) +
carbondata git commit: [CARBONDATA-2990] Queries slow down after some time due to broadcast issue
Repository: carbondata Updated Branches: refs/heads/master 19097f272 -> 3c7b33992 [CARBONDATA-2990] Queries slow down after some time due to broadcast issue Problem It is observed that during consecutive run of queries after some time queries are slowing down. This is causing the degrade in query performance. No exception is thrown in driver and executor logs but as observed from the logs the time to broadcast hadoop conf is increasing after every query run. Analysis This is happening because in carbon SerializableConfiguration class is overriden from spark. Spark registers this class with Kryo serializer and hence the computation using the kryo is fast. The same benefit is not observed in carbondata becuase of overriding the class. Internal Spark sizeEstimator calculates the size of object and there are few extra objects in carbondata overriden class because of which the computation time is increasing. Solution Use the spark class instead of overriding the class in carbondata This closes #2803 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3c7b3399 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3c7b3399 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3c7b3399 Branch: refs/heads/master Commit: 3c7b33992e06d81fb47d81bf8ccf7884f845b3ff Parents: 19097f2 Author: manishgupta88 Authored: Mon Oct 8 19:38:54 2018 +0530 Committer: ravipesala Committed: Tue Oct 9 13:38:51 2018 +0530 -- .../carbondata/spark/load/CsvRDDHelper.scala| 4 +-- .../load/DataLoadProcessBuilderOnSpark.scala| 6 ++-- .../load/DataLoadProcessorStepOnSpark.scala | 6 ++-- .../apache/carbondata/spark/rdd/CarbonRDD.scala | 4 +-- .../spark/rdd/NewCarbonDataLoadRDD.scala| 33 .../apache/spark/sql/util/SparkSQLUtil.scala| 21 - .../spark/rdd/CarbonDataRDDFactory.scala| 4 +-- .../spark/sql/CarbonDictionaryDecoder.scala | 8 ++--- .../management/CarbonLoadDataCommand.scala | 7 +++-- .../command/mutation/DeleteExecution.scala | 7 ++--- .../command/mutation/HorizontalCompaction.scala | 8 ++--- 11 files changed, 46 insertions(+), 62 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3c7b3399/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala index 8d6dd32..5511645 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/CsvRDDHelper.scala @@ -34,6 +34,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.execution.datasources.{FilePartition, FileScanRDD, PartitionedFile} +import org.apache.spark.sql.util.SparkSQLUtil import org.apache.spark.sql.util.SparkSQLUtil.sessionState import org.apache.carbondata.common.logging.{LogService, LogServiceFactory} @@ -41,7 +42,6 @@ import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.util.ThreadLocalSessionInfo import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat import org.apache.carbondata.processing.loading.model.CarbonLoadModel -import org.apache.carbondata.spark.rdd.SerializableConfiguration import org.apache.carbondata.spark.util.CommonUtil object CsvRDDHelper { @@ -110,7 +110,7 @@ object CsvRDDHelper { closePartition() // 2. read function -val serializableConfiguration = new SerializableConfiguration(hadoopConf) +val serializableConfiguration = SparkSQLUtil.getSerializableConfigurableInstance(hadoopConf) val readFunction = new (PartitionedFile => Iterator[InternalRow]) with Serializable { override def apply(file: PartitionedFile): Iterator[InternalRow] = { new Iterator[InternalRow] { http://git-wip-us.apache.org/repos/asf/carbondata/blob/3c7b3399/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala index 2e74a94..923676c 100644 --- a/integration/spark-common/src/main/scala/org/
carbondata git commit: [CARBONDATA-2994] unify badrecordpath property name for create and load
Repository: carbondata Updated Branches: refs/heads/master fa0882569 -> 19097f272 [CARBONDATA-2994] unify badrecordpath property name for create and load Problem: Currently bad records path can be specified in create and load. In create the property name is bad_records_path and load is bad_record_path. This can cause confusion for the user. Solution: Use bad_record_path as the property for create so that both load and create use the same name. This closes #2799 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/19097f27 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/19097f27 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/19097f27 Branch: refs/heads/master Commit: 19097f272fe3227c71c86338bb8bf788e87cd4aa Parents: fa08825 Author: kunal642 Authored: Fri Oct 5 14:57:26 2018 +0530 Committer: ravipesala Committed: Mon Oct 8 14:29:59 2018 +0530 -- docs/ddl-of-carbondata.md| 11 +++ docs/dml-of-carbondata.md| 8 .../carbondata/hadoop/api/CarbonTableOutputFormat.java | 2 +- .../StandardPartitionBadRecordLoggerTest.scala | 2 +- .../org/apache/carbondata/spark/StreamingOption.scala| 2 +- .../sql/execution/command/carbonTableSchemaCommon.scala | 4 ++-- .../command/table/CarbonDescribeFormattedCommand.scala | 7 +++ .../spark/carbondata/BadRecordPathLoadOptionTest.scala | 4 ++-- .../spark/carbondata/TestStreamingTableOperation.scala | 8 .../carbondata/processing/util/CarbonBadRecordUtil.java | 2 +- 10 files changed, 30 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/19097f27/docs/ddl-of-carbondata.md -- diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md index 22d754a..c1a891d 100644 --- a/docs/ddl-of-carbondata.md +++ b/docs/ddl-of-carbondata.md @@ -33,6 +33,7 @@ CarbonData DDL statements are documented here,which includes: * [Hive/Parquet folder Structure](#support-flat-folder-same-as-hiveparquet) * [Extra Long String columns](#string-longer-than-32000-characters) * [Compression for Table](#compression-for-table) + * [Bad Records Path](#bad-records-path) * [CREATE TABLE AS SELECT](#create-table-as-select) * [CREATE EXTERNAL TABLE](#create-external-table) * [External Table on Transactional table location](#create-external-table-on-managed-table-data-location) @@ -454,6 +455,16 @@ CarbonData DDL statements are documented here,which includes: ``` carbon.column.compressor=zstd ``` + + - # Bad Records Path + This property is used to specify the location where bad records would be written. + As the table path remains the same after rename therefore the user can use this property to + specify bad records path for the table at the time of creation, so that the same path can + be later viewed in table description for reference. + + ``` + TBLPROPERTIES('BAD_RECORD_PATH'='/opt/badrecords'') + ``` ## CREATE TABLE AS SELECT This function allows user to create a Carbon table from any of the Parquet/Hive/Carbon table. This is beneficial when the user wants to create Carbon table from any other Parquet/Hive table and use the Carbon query engine to query and achieve better query results for cases where Carbon is faster than other file formats. Also this feature can be used for backing up the data. http://git-wip-us.apache.org/repos/asf/carbondata/blob/19097f27/docs/dml-of-carbondata.md -- diff --git a/docs/dml-of-carbondata.md b/docs/dml-of-carbondata.md index db7c118..393ebd3 100644 --- a/docs/dml-of-carbondata.md +++ b/docs/dml-of-carbondata.md @@ -240,14 +240,6 @@ CarbonData DML statements are documented here,which includes: * Since Bad Records Path can be specified in create, load and carbon properties. Therefore, value specified in load will have the highest priority, and value specified in carbon properties will have the least priority. - **Bad Records Path:** - This property is used to specify the location where bad records would be written. - - - ``` - TBLPROPERTIES('BAD_RECORDS_PATH'='/opt/badrecords'') - ``` - Example: ``` http://git-wip-us.apache.org/repos/asf/carbondata/blob/19097f27/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java -- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableOutputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/
carbondata git commit: [CARBONDATA-2993] fix random NPE while concurrent loading
Repository: carbondata Updated Branches: refs/heads/master ca30ad97d -> fa0882569 [CARBONDATA-2993] fix random NPE while concurrent loading This closes #2797 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fa088256 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fa088256 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fa088256 Branch: refs/heads/master Commit: fa0882569872d3280807a5a57f36c4c43f48cc99 Parents: ca30ad9 Author: kunal642 Authored: Fri Oct 5 10:13:05 2018 +0530 Committer: ravipesala Committed: Fri Oct 5 15:31:33 2018 +0530 -- .../scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala | 9 + .../org/apache/carbondata/sdk/file/AvroCarbonWriter.java| 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa088256/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala index 87d8f50..3a02f85 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonRDD.scala @@ -47,8 +47,10 @@ abstract class CarbonRDD[T: ClassTag]( info } + @transient val hadoopConf = SparkSQLUtil.sessionState(ss).newHadoopConf() + val config: Broadcast[SerializableConfiguration] = sparkContext -.broadcast(new SerializableConfiguration(SparkSQLUtil.sessionState(ss).newHadoopConf())) +.broadcast(new SerializableConfiguration(hadoopConf)) /** Construct an RDD with just a one-to-one dependency on one parent */ def this(@transient sparkSession: SparkSession, @transient oneParent: RDD[_]) = @@ -57,7 +59,7 @@ abstract class CarbonRDD[T: ClassTag]( protected def internalGetPartitions: Array[Partition] override def getPartitions: Array[Partition] = { -ThreadLocalSessionInfo.setConfigurationToCurrentThread(config.value.value) +ThreadLocalSessionInfo.setConfigurationToCurrentThread(hadoopConf) internalGetPartitions } @@ -66,8 +68,7 @@ abstract class CarbonRDD[T: ClassTag]( final def compute(split: Partition, context: TaskContext): Iterator[T] = { TaskContext.get.addTaskCompletionListener(_ => ThreadLocalSessionInfo.unsetAll()) -carbonSessionInfo.getNonSerializableExtraInfo.put("carbonConf", config - .value.value) +carbonSessionInfo.getNonSerializableExtraInfo.put("carbonConf", getConf) ThreadLocalSessionInfo.setCarbonSessionInfo(carbonSessionInfo) TaskMetricsMap.threadLocal.set(Thread.currentThread().getId) val carbonTaskInfo = new CarbonTaskInfo http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa088256/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java -- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java index d19a96d..e4a65c0 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/AvroCarbonWriter.java @@ -520,7 +520,7 @@ public class AvroCarbonWriter extends CarbonWriter { // recursively get the sub fields ArrayList arraySubField = new ArrayList<>(); // array will have only one sub field. -StructField structField = prepareSubFields("val", childSchema.getElementType()); +StructField structField = prepareSubFields(fieldName, childSchema.getElementType()); if (structField != null) { arraySubField.add(structField); return new Field(fieldName, "array", arraySubField);
carbondata git commit: [CARBONDATA-2991]NegativeArraySizeException during query execution
Repository: carbondata Updated Branches: refs/heads/master 30adaa8c1 -> d3927172f [CARBONDATA-2991]NegativeArraySizeException during query execution Issue :- During Query Execution sometime NegativeArraySizeException Exception in Some Tasks . And sometime Executor is lost (JVM crash) Root Cause :- It is because existing memoryblock is removed while it was in-use. This happened because duplicate taskid generated. Sometime freed same memory addresses are assigned to another task which will initialize memory block to0 and this cause NegativeSizeArrayException whereas sometime freed memory will not be used any task of executor process but running task will try to access it and as that address is not part of process so JVM crash will happen. Solution :- Change taskID generation to UUID based instead of System.nanoTime() This closes #2796 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d3927172 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d3927172 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d3927172 Branch: refs/heads/master Commit: d3927172f19b2251e77b840e53d8678cba2a38bd Parents: 30adaa8 Author: BJangir Authored: Wed Oct 3 23:05:42 2018 +0530 Committer: ravipesala Committed: Thu Oct 4 21:11:09 2018 +0530 -- .../unsafe/UnsafeAbstractDimensionDataChunkStore.java | 2 +- .../core/datastore/page/UnsafeFixLengthColumnPage.java | 2 +- .../core/datastore/page/VarLengthColumnPageBase.java | 2 +- .../core/indexstore/AbstractMemoryDMStore.java | 2 +- .../carbondata/core/memory/IntPointerBuffer.java | 4 ++-- .../carbondata/core/memory/UnsafeMemoryManager.java| 12 ++-- .../core/memory/UnsafeSortMemoryManager.java | 13 +++-- .../apache/carbondata/core/util/CarbonTaskInfo.java| 6 +++--- .../org/apache/carbondata/core/util/CarbonUtil.java| 10 ++ .../carbondata/core/util/ThreadLocalTaskInfo.java | 3 ++- .../org/apache/carbondata/spark/rdd/CarbonRDD.scala| 2 +- .../org/apache/carbondata/spark/util/CommonUtil.scala | 2 +- .../loading/sort/unsafe/UnsafeCarbonRowPage.java | 4 ++-- .../loading/sort/unsafe/UnsafeSortDataRows.java| 2 +- .../org/apache/carbondata/sdk/file/CarbonReader.java | 3 ++- .../carbondata/store/worker/SearchRequestHandler.java | 9 +++-- 16 files changed, 44 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d3927172/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java index 940ca1a..89bce2d 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java @@ -62,7 +62,7 @@ public abstract class UnsafeAbstractDimensionDataChunkStore implements Dimension */ protected boolean isMemoryOccupied; - private final long taskId = ThreadLocalTaskInfo.getCarbonTaskInfo().getTaskId(); + private final String taskId = ThreadLocalTaskInfo.getCarbonTaskInfo().getTaskId(); /** * Constructor http://git-wip-us.apache.org/repos/asf/carbondata/blob/d3927172/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java index 9e0eb8d..7df29df 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java @@ -51,7 +51,7 @@ public class UnsafeFixLengthColumnPage extends ColumnPage { // size of the allocated memory, in bytes private int capacity; - private final long taskId = ThreadLocalTaskInfo.getCarbonTaskInfo().getTaskId(); + private final String taskId = ThreadLocalTaskInfo.getCarbonTaskInfo().getTaskId(); private static final int byteBits = DataTypes.BYTE.getSizeBits(); private static final int shortBits = DataTypes.SHORT.getSizeBits(); http://git-wip-us.apache.org/repos/asf/carbondata/blob/d3927172/core/src/main/java/org/apa
carbondata git commit: [CARBONDATA-2985]Fix issues in Table level compaction and TableProperties
Repository: carbondata Updated Branches: refs/heads/master 396c26f53 -> 30adaa8c1 [CARBONDATA-2985]Fix issues in Table level compaction and TableProperties Issue :- If 2nd Level compaction is 1 like 2,1 or 6,1 then only 1st time compaction is done subsequent compaction are ignored . ( like if 2,1 is given then only 0.1 is segment is created and other segments are ignore forever ) Table level compaction does not support ,0 as 2nd level compaction value but system level compaction supports same. Solution :- if 2nd level compaction value is 1 then user does not want 2nd level compaction at all which mean 2nd level compaction can be set to 0. remove check to support 2nd level compaction as 0 in table level. This closes #2794 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/30adaa8c Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/30adaa8c Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/30adaa8c Branch: refs/heads/master Commit: 30adaa8c15e430b94bd1448969f50cb2451e1746 Parents: 396c26f Author: BJangir Authored: Tue Oct 2 00:17:29 2018 +0530 Committer: ravipesala Committed: Thu Oct 4 18:58:49 2018 +0530 -- .../TableLevelCompactionOptionTest.scala| 84 .../carbondata/spark/util/CommonUtil.scala | 2 +- .../processing/merger/CarbonDataMergerUtil.java | 7 ++ 3 files changed, 92 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/30adaa8c/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/TableLevelCompactionOptionTest.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/TableLevelCompactionOptionTest.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/TableLevelCompactionOptionTest.scala index 458d656..7b138f7 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/TableLevelCompactionOptionTest.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/TableLevelCompactionOptionTest.scala @@ -271,4 +271,88 @@ class TableLevelCompactionOptionTest extends QueryTest assert(!segmentSequenceIds.contains("0.1")) assert(!segmentSequenceIds.contains("3.1")) } + + test("AUTO MERGE TRUE:Verify 2nd Level compaction equals to 1"){ +sql("DROP TABLE IF EXISTS tablecompaction_table") +sql( + """ +|create table tablecompaction_table( +|name string,age int) stored by 'carbondata' + |tblproperties('AUTO_LOAD_MERGE'='true','COMPACTION_LEVEL_THRESHOLD'='2,1') + """.stripMargin) + +for(i <-0 until 4){ + sql("insert into tablecompaction_table select 'a',12") +} +var segments = sql("SHOW SEGMENTS FOR TABLE tablecompaction_table") +var segmentSequenceIds = segments.collect().map { each => (each.toSeq) (0) } +assert(segmentSequenceIds.size==6) +assert(segmentSequenceIds.contains("0.1")) +assert(segmentSequenceIds.contains("2.1")) + } + + test("AUTO MERGE FALSE:Verify 2nd Level compaction equals to 1"){ +sql("DROP TABLE IF EXISTS tablecompaction_table") +sql( + """ +|create table tablecompaction_table( +|name string,age int) stored by 'carbondata' +|tblproperties('COMPACTION_LEVEL_THRESHOLD'='2,1') + """.stripMargin) + +for(i <-0 until 4){ + sql("insert into tablecompaction_table select 'a',12") +} +sql("alter table tablecompaction_table compact 'minor' ") +var segments = sql("SHOW SEGMENTS FOR TABLE tablecompaction_table") +var segmentSequenceIds = segments.collect().map { each => (each.toSeq) (0) } +assert(segmentSequenceIds.size==6) +assert(segmentSequenceIds.contains("0.1")) +assert(segmentSequenceIds.contains("2.1")) + } + + // 2nd Level compaction value = 0 is supported by system level(like 6,0) + // same need to support for table level also + test("Verify 2nd Level compaction equals to 0"){ +sql("DROP TABLE IF EXISTS tablecompaction_table") +sql( + """ +|create table tablecompaction_table( +|name string,age int) stored by 'carbondata' + |tblproperties('AUTO_LOAD_MERGE'='true','COMPACTION_LEVEL_THRESHOLD'='2,0') + """.stripMargin) + +for(i <-0 until 4)
carbondata git commit: [CARBONDATA-2983][BloomDataMap] Change bloom query model to proceed multiple filter values
Repository: carbondata Updated Branches: refs/heads/master 3edea12a8 -> 396c26f53 [CARBONDATA-2983][BloomDataMap] Change bloom query model to proceed multiple filter values This PR is supposed to optimize the pruning for InExpression. For a expression like colA in (1, 2, 3). Previously it will create 3 bloom query model and for each query model it will iterate all the bloomfilters; But now we only need to generate 1 bloom query model. Also this PR does an intersection for the pruned result generated by each expression. This optimization is important if we have multiple index columns and used them in one query. This closes #2781 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/396c26f5 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/396c26f5 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/396c26f5 Branch: refs/heads/master Commit: 396c26f531192e3da8a233ca14024dd8899da52a Parents: 3edea12 Author: Manhua Authored: Fri Sep 28 11:39:39 2018 +0800 Committer: ravipesala Committed: Thu Oct 4 18:13:12 2018 +0530 -- .../datamap/bloom/BloomCoarseGrainDataMap.java | 98 1 file changed, 59 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/396c26f5/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java -- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java index ee71142..a5376be 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java @@ -21,15 +21,7 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.text.DateFormat; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TimeZone; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import org.apache.carbondata.common.annotations.InterfaceAudience; @@ -47,7 +39,6 @@ import org.apache.carbondata.core.indexstore.Blocklet; import org.apache.carbondata.core.indexstore.PartitionSpec; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.CarbonMetadata; -import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.CarbonTable; @@ -171,7 +162,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { @Override public List prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List partitions) throws IOException { -Set hitBlocklets = new HashSet<>(); +Set hitBlocklets = null; if (filterExp == null) { // null is different from empty here. Empty means after pruning, no blocklet need to scan. return null; @@ -185,6 +176,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { throw new RuntimeException(e); } for (BloomQueryModel bloomQueryModel : bloomQueryModels) { + Set tempHitBlockletsResult = new HashSet<>(); LOGGER.debug("prune blocklet for query: " + bloomQueryModel); BloomCacheKeyValue.CacheKey cacheKey = new BloomCacheKeyValue.CacheKey( this.indexPath.toString(), bloomQueryModel.columnName); @@ -195,17 +187,32 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { // skip shard which has been pruned in Main datamap continue; } -boolean scanRequired = bloomFilter.membershipTest(new Key(bloomQueryModel.filterValue)); +boolean scanRequired = false; +for (byte[] value: bloomQueryModel.filterValues) { + scanRequired = bloomFilter.membershipTest(new Key(value)); + if (scanRequired) { +// if any filter value hit this bloomfilter +// no need to check other filter values +break; + } +} if (scanRequired) { LOGGER.debug(String.format("BloomCoarseGrainDataMap: Need to scan -> blocklet#%s", String.valueOf(bloomFilter.getBlockletNo(; Blocklet blocklet = new Blocklet(bloomFilter.getShardName(), - St
carbondata git commit: [CARBONDATA-2986] Table Properties are lost when multiple driver concurrently
Repository: carbondata Updated Branches: refs/heads/master 11bd0ade9 -> 3edea12a8 [CARBONDATA-2986] Table Properties are lost when multiple driver concurrently Issue :- When concurrently multiple driver is creating table , for same table table properties are lost . Root Cause :-Schema file is getting overwritten from CarbonRelation#createTableIfNotExists,because lookup of table is failed . this is happpened because concurrenly .mdt file is updated and current table is removed from cache org.apache.spark.sql.hive.CarbonFileMetastore#checkSchemasModifiedTimeAndReloadTable Solution :- Since carbon table is already created and Schema file is already written so no need to do lookup again . This closes #2785 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/3edea12a Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/3edea12a Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/3edea12a Branch: refs/heads/master Commit: 3edea12a83e70dddb1eca271bf5660f73de272f5 Parents: 11bd0ad Author: BJangir Authored: Fri Sep 28 17:17:30 2018 +0530 Committer: ravipesala Committed: Thu Oct 4 18:05:06 2018 +0530 -- .../scala/org/apache/spark/sql/CarbonSource.scala | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/3edea12a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala index 16cee96..cd1087d 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonSource.scala @@ -57,6 +57,7 @@ class CarbonSource extends CreatableRelationProvider with RelationProvider with SchemaRelationProvider with StreamSinkProvider with DataSourceRegister { override def shortName(): String = "carbondata" + private val LOGGER = LogServiceFactory.getLogService(CarbonSource.getClass.getName) // will be called if hive supported create table command is provided override def createRelation(sqlContext: SQLContext, @@ -143,7 +144,7 @@ class CarbonSource extends CreatableRelationProvider with RelationProvider .exists(_.table.equalsIgnoreCase(tableName))) { getPathForTable(sqlContext.sparkSession, dbName, tableName, newParameters) } else { -createTableIfNotExists(sqlContext.sparkSession, newParameters, dataSchema) + createTableIfNotExists(sqlContext.sparkSession, dbName, tableName, newParameters, dataSchema) } CarbonDatasourceHadoopRelation(sqlContext.sparkSession, Array(path), updatedParams, @@ -160,6 +161,8 @@ class CarbonSource extends CreatableRelationProvider with RelationProvider private def createTableIfNotExists( sparkSession: SparkSession, + dbName: String, + tableName: String, parameters: Map[String, String], dataSchema: StructType): (String, Map[String, String]) = { @@ -167,10 +170,18 @@ class CarbonSource extends CreatableRelationProvider with RelationProvider val tableName: String = parameters.getOrElse("tableName", "").toLowerCase try { - val carbonTable = CarbonEnv.getCarbonTable(Some(dbName), tableName)(sparkSession) - (carbonTable.getTablePath, parameters) + if (!(parameters.contains("carbonSchemaPartsNo") +|| parameters.contains("carbonschemapartsno"))) { +val carbonTable = CarbonEnv.getCarbonTable(Some(dbName), tableName)(sparkSession) +(carbonTable.getTablePath, parameters) + } else { +(getPathForTable(sparkSession, dbName, tableName, parameters)) + } + } catch { case _: NoSuchTableException => +LOGGER.warn("Carbon Table [" +dbName +"] [" +tableName +"] is not found, " + + "Now existing Schema will be overwritten with default properties") val metaStore = CarbonEnv.getInstance(sparkSession).carbonMetastore val identifier = AbsoluteTableIdentifier.from( CarbonEnv.getTablePath(Some(dbName), tableName)(sparkSession),
carbondata git commit: [CARBONDATA-2992] Fixed Between Query Data Mismatch issue for timestamp data type
Repository: carbondata Updated Branches: refs/heads/master 6aa2a90bf -> 11bd0ade9 [CARBONDATA-2992] Fixed Between Query Data Mismatch issue for timestamp data type Problem: Between query is giving wrong result. Root cause: For timestamp time when filter is given in -mm-dd format instead of -mm-dd HH:MM:SS format it will add cast, In CastExpressionOptimization it is using SimpleDateFormat object to parse the filter value which is failing as filter values is not same. Solution: Use SPARK:DateTimeUtils.stringToTime method as spark is handling for above scenario. This closes #2787 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/11bd0ade Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/11bd0ade Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/11bd0ade Branch: refs/heads/master Commit: 11bd0ade93a3ac72b42068c3b57ed8bb1203ab47 Parents: 6aa2a90 Author: kumarvishal09 Authored: Fri Sep 28 18:33:29 2018 +0530 Committer: ravipesala Committed: Thu Oct 4 18:02:08 2018 +0530 -- .../src/test/resources/datedatafile.csv | 7 ++ .../src/test/resources/timestampdatafile.csv| 7 ++ ...imestampNoDictionaryColumnCastTestCase.scala | 80 .../execution/CastExpressionOptimization.scala | 67 +--- .../bloom/BloomCoarseGrainDataMapSuite.scala| 47 +--- 5 files changed, 152 insertions(+), 56 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/resources/datedatafile.csv -- diff --git a/integration/spark-common-test/src/test/resources/datedatafile.csv b/integration/spark-common-test/src/test/resources/datedatafile.csv new file mode 100644 index 000..43a615d --- /dev/null +++ b/integration/spark-common-test/src/test/resources/datedatafile.csv @@ -0,0 +1,7 @@ +datetype1 +2018-09-11 +2018-09-12 +2018-09-13 +2018-09-14 +2018-09-15 +2018-09-16 http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/resources/timestampdatafile.csv -- diff --git a/integration/spark-common-test/src/test/resources/timestampdatafile.csv b/integration/spark-common-test/src/test/resources/timestampdatafile.csv new file mode 100644 index 000..473f330 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/timestampdatafile.csv @@ -0,0 +1,7 @@ +timestamptype +2018-09-11 00:00:00 +2018-09-12 00:00:00 +2018-09-13 00:00:00 +2018-09-14 00:00:00 +2018-09-15 00:00:00 +2018-09-16 00:00:00 http://git-wip-us.apache.org/repos/asf/carbondata/blob/11bd0ade/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala -- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala new file mode 100644 index 000..41c7005 --- /dev/null +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampNoDictionaryColumnCastTestCase.scala @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark.testsuite.directdictionary + + +import org.apache.spark.sql.Row +import org.apache.spark.sql.test.util.QueryTest +import org.scalatest.BeforeAndAfterAll + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + +/** + * Test Class for detailed query on timestamp datatypes + */ +class TimestampNoDictionaryColumnCastTestCase extends QueryTest with BeforeAndAfter
carbondata git commit: [CARBONDATA-2975] DefaultValue choosing and removeNullValues on rangefilters is incorrect
Repository: carbondata Updated Branches: refs/heads/master 18fbdfc40 -> 6aa2a90bf [CARBONDATA-2975] DefaultValue choosing and removeNullValues on rangefilters is incorrect This closes #2770 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6aa2a90b Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6aa2a90b Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6aa2a90b Branch: refs/heads/master Commit: 6aa2a90bf113132880694e305a264cdccc4c5693 Parents: 18fbdfc Author: dhatchayani Authored: Wed Sep 26 20:49:06 2018 +0530 Committer: ravipesala Committed: Thu Oct 4 17:22:14 2018 +0530 -- .../apache/carbondata/core/scan/filter/FilterUtil.java | 11 ++- .../filter/executer/RangeValueFilterExecuterImpl.java| 2 +- .../executer/RowLevelRangeGrtThanFiterExecuterImpl.java | 10 -- .../RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java | 10 -- .../RowLevelRangeLessThanEqualFilterExecuterImpl.java| 11 +++ .../RowLevelRangeLessThanFilterExecuterImpl.java | 11 +++ .../spark/testsuite/sortcolumns/TestSortColumns.scala| 6 ++ 7 files changed, 43 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java index b4354d2..fe92c42 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java @@ -1947,11 +1947,12 @@ public final class FilterUtil { public static void removeNullValues(DimensionColumnPage dimensionColumnPage, BitSet bitSet, byte[] defaultValue) { if (!bitSet.isEmpty()) { - if (null != dimensionColumnPage.getNullBits() && !dimensionColumnPage.getNullBits().isEmpty() - && !dimensionColumnPage.isExplicitSorted() && !dimensionColumnPage.isAdaptiveEncoded()) { -for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) { - if (dimensionColumnPage.getNullBits().get(i)) { -bitSet.flip(i); + if (null != dimensionColumnPage.getNullBits()) { +if (!dimensionColumnPage.getNullBits().isEmpty()) { + for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) { +if (dimensionColumnPage.getNullBits().get(i)) { + bitSet.flip(i); +} } } } else { http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java index b9729db..886a13b 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java @@ -632,7 +632,7 @@ public class RangeValueFilterExecuterImpl implements FilterExecuter { } else { if (dimColEvaluatorInfo.getDimension().getDataType() == DataTypes.STRING) { defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; -} else { +} else if (!dimensionColumnPage.isAdaptiveEncoded()) { defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY; } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6aa2a90b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java index 63a5976..c6835f8 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java @@ -28,6 +28,7 @@ import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; import org.apache.carbondata.core.d
carbondata git commit: [HOTFIX] Changes to align printing of information in explain command based on enable.query.statistics flag
Repository: carbondata Updated Branches: refs/heads/master d8003a31c -> c3a870449 [HOTFIX] Changes to align printing of information in explain command based on enable.query.statistics flag Now user need to set the flag enable.query.statistics = true to print the pruning and dataMap related information in the explain command. This closes #2795 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c3a87044 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c3a87044 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c3a87044 Branch: refs/heads/master Commit: c3a8704494d8d4cffbe8f35c43101ce248913965 Parents: d8003a3 Author: manishgupta88 Authored: Wed Oct 3 18:13:45 2018 +0530 Committer: ravipesala Committed: Wed Oct 3 20:23:12 2018 +0530 -- .../apache/carbondata/core/profiler/ExplainCollector.java | 9 - docs/datamap/datamap-management.md | 2 +- .../cluster/sdv/generated/BloomFilterDataMapTestCase.scala | 6 +- .../datamap/lucene/LuceneFineGrainDataMapSuite.scala| 5 + .../lucene/LuceneFineGrainDataMapWithSearchModeSuite.scala | 5 + .../preaggregate/TestPreAggregateTableSelection.scala | 7 +++ .../testsuite/createTable/TestRenameTableWithDataMap.scala | 8 .../spark/testsuite/datamap/CGDataMapTestCase.scala | 5 + .../spark/testsuite/datamap/FGDataMapTestCase.scala | 7 ++- .../src/main/scala/org/apache/spark/sql/CarbonSession.scala | 1 + .../sql/execution/command/table/CarbonExplainCommand.scala | 6 +- .../datamap/bloom/BloomCoarseGrainDataMapSuite.scala| 5 + 12 files changed, 61 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3a87044/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java b/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java index 755c56a..8513dac 100644 --- a/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/profiler/ExplainCollector.java @@ -26,7 +26,9 @@ import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; import org.apache.carbondata.common.annotations.InterfaceAudience; +import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datamap.dev.expr.DataMapWrapperSimpleInfo; +import org.apache.carbondata.core.util.CarbonProperties; /** * An information collector used for EXPLAIN command, to print out @@ -52,7 +54,12 @@ public class ExplainCollector { } public static void setup() { -INSTANCE = new ExplainCollector(); +boolean isQueryStatisticsEnabled = Boolean.parseBoolean(CarbonProperties.getInstance() +.getProperty(CarbonCommonConstants.ENABLE_QUERY_STATISTICS, +CarbonCommonConstants.ENABLE_QUERY_STATISTICS_DEFAULT)); +if (isQueryStatisticsEnabled) { + INSTANCE = new ExplainCollector(); +} } public static void remove() { http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3a87044/docs/datamap/datamap-management.md -- diff --git a/docs/datamap/datamap-management.md b/docs/datamap/datamap-management.md index eee03a7..bf52c05 100644 --- a/docs/datamap/datamap-management.md +++ b/docs/datamap/datamap-management.md @@ -122,7 +122,7 @@ There is a DataMapCatalog interface to retrieve schema of all datamap, it can be How can user know whether datamap is used in the query? -User can use EXPLAIN command to know, it will print out something like +User can set enable.query.statistics = true and use EXPLAIN command to know, it will print out something like ```text == CarbonData Profiler == http://git-wip-us.apache.org/repos/asf/carbondata/blob/c3a87044/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala -- diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala index 8acbcd6..077e007 100644 --- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/genera
carbondata git commit: [CARBONDATA-2976] Support dumping column chunk metadata in CarbonCli
Repository: carbondata Updated Branches: refs/heads/master fa9c8323c -> d8003a31c [CARBONDATA-2976] Support dumping column chunk metadata in CarbonCli By using -k option, CarbonCli will print all column chunk/page metadata for the specified column For example, java CarbonCli -cmd summary -p /home/root1/bin /home/root1/.local/bin /usr/local/sbin /usr/local/bin /usr/sbin /usr/bin /sbin /bin /usr/games /usr/local/games /snap/bin /usr/lib/jvm/java-8-oracle/bin /usr/lib/jvm/java-8-oracle/db/bin /usr/lib/jvm/java-8-oracle/jre/bin -c name -k will output: ## Page Meta for column 'name' in file /Users/jacky/code/carbondata/tools/cli/CarbonCliTest/part-0-138391629343461_batchno0-0-null-138390048546321.carbondata Blocklet 0: Page 0 (offset 0, length 12049): DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED, total_uncompressed_size:0, total_compressed_size:0, compressor_name:snappy), rowMajor:false, data_page_length:12039, rowid_page_length:10, presence:PresenceMeta(represents_presence:false, present_bit_stream:00), sort_state:SORT_EXPLICIT, encoders:[INVERTED_INDEX], encoder_meta:[], min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F 62 6F 74 30], min_max_presence:[true]), numberOfRowsInpage:32000) Page 1 (offset 12049, length 12049): DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED, total_uncompressed_size:0, total_compressed_size:0, compressor_name:snappy), rowMajor:false, data_page_length:12039, rowid_page_length:10, presence:PresenceMeta(represents_presence:false, present_bit_stream:00), sort_state:SORT_EXPLICIT, encoders:[INVERTED_INDEX], encoder_meta:[], min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F 62 6F 74 30], min_max_presence:[true]), numberOfRowsInpage:32000) Page 2 (offset 24098, length 12049): DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED, total_uncompressed_size:0, total_compressed_size:0, compressor_name:snappy), rowMajor:false, data_page_length:12039, rowid_page_length:10, presence:PresenceMeta(represents_presence:false, present_bit_stream:00), sort_state:SORT_EXPLICIT, encoders:[INVERTED_INDEX], encoder_meta:[], min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F 62 6F 74 30], min_max_presence:[true]), numberOfRowsInpage:32000) This closes #2771 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d8003a31 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d8003a31 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d8003a31 Branch: refs/heads/master Commit: d8003a31c602807f00d438d8be392992cb0955ac Parents: fa9c832 Author: Jacky Li Authored: Wed Sep 26 23:51:34 2018 +0800 Committer: ravipesala Committed: Wed Oct 3 20:17:04 2018 +0530 -- .../org/apache/carbondata/tool/CarbonCli.java | 2 + .../org/apache/carbondata/tool/DataFile.java| 8 +++- .../org/apache/carbondata/tool/DataSummary.java | 45 ++-- .../apache/carbondata/tool/CarbonCliTest.java | 13 ++ 4 files changed, 63 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8003a31/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java -- diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java index 5725f8e..f1baa92 100644 --- a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java +++ b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java @@ -65,6 +65,7 @@ public class CarbonCli { Option segment = new Option("m", "showSegment", false, "print segment information"); Option tblProperties = new Option("t", "tblProperties", false, "print table properties"); Option detail = new Option("b", "blocklet", false, "print blocklet size detail"); +Option columnMeta = new Option("k", "columnChunkMeta", false, "print column chunk meta"); Option columnName = OptionBuilder .withArgName("column name") .hasArg() @@ -82,6 +83,7 @@ public class CarbonCli { options.addOption(segment); options.addOption(tblProperties); options.addOption(detail); +options.addOption(columnMeta); options.addOption(columnName); return options; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/d8003a31/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java -- diff --git a/tools/cli/src/main/java/org/apache/carbondata/too
carbondata git commit: [CARBONDATA-2984][Streaming] Fix NPE when there is no data in the task of a batch
Repository: carbondata Updated Branches: refs/heads/master 0b16816da -> fa9c8323c [CARBONDATA-2984][Streaming] Fix NPE when there is no data in the task of a batch Fix NPE when there is no data in the task of a batch Streaming batch maybe has no data, so it doesn't require to append blocklet to streaming file. So it doesn't need to update min/max index of streaming file, just use min/max index of old file . This closes #2782 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/fa9c8323 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/fa9c8323 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/fa9c8323 Branch: refs/heads/master Commit: fa9c8323c11c083452d75886cbbdad1f23d6dfb7 Parents: 0b16816 Author: QiangCai Authored: Fri Sep 28 14:48:39 2018 +0800 Committer: ravipesala Committed: Wed Oct 3 20:13:50 2018 +0530 -- .../TestStreamingTableOperation.scala | 49 +++- .../streaming/CarbonStreamRecordWriter.java | 5 +- .../streaming/segment/StreamSegment.java| 15 -- 3 files changed, 61 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/fa9c8323/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala -- diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala index 43c1e5a..607c429 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/TestStreamingTableOperation.scala @@ -37,6 +37,7 @@ import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.common.exceptions.NoSuchStreamException import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.metadata.schema.datamap.DataMapClassProvider.TIMESERIES import org.apache.carbondata.core.metadata.schema.table.CarbonTable import org.apache.carbondata.core.statusmanager.{FileFormat, SegmentStatus} @@ -125,6 +126,8 @@ class TestStreamingTableOperation extends QueryTest with BeforeAndAfterAll { createTable(tableName = "agg_table", streaming = true, withBatchLoad = false) +createTable(tableName = "stream_table_empty", streaming = true, withBatchLoad = false) + var csvDataDir = integrationPath + "/spark2/target/csvdatanew" generateCSVDataFile(spark, idStart = 10, rowNums = 5, csvDataDir) generateCSVDataFile(spark, idStart = 10, rowNums = 5, csvDataDir, SaveMode.Append) @@ -213,6 +216,7 @@ class TestStreamingTableOperation extends QueryTest with BeforeAndAfterAll { sql("drop table if exists streaming.stream_table_reopen") sql("drop table if exists streaming.stream_table_drop") sql("drop table if exists streaming.agg_table_block") +sql("drop table if exists streaming.stream_table_empty") } // normal table not support streaming ingest @@ -226,7 +230,7 @@ class TestStreamingTableOperation extends QueryTest with BeforeAndAfterAll { .asInstanceOf[CarbonRelation].metaData.carbonTable var server: ServerSocket = null try { - server = getServerSocket + server = getServerSocket() val thread1 = createWriteSocketThread(server, 2, 10, 1) thread1.start() // use thread pool to catch the exception of sink thread @@ -2253,6 +2257,46 @@ class TestStreamingTableOperation extends QueryTest with BeforeAndAfterAll { sql("DROP TABLE IF EXISTS dim") } + // test empty batch + test("test empty batch") { +executeStreamingIngest( + tableName = "stream_table_empty", + batchNums = 1, + rowNumsEachBatch = 10, + intervalOfSource = 1, + intervalOfIngest = 3, + continueSeconds = 10, + generateBadRecords = false, + badRecordAction = "force", + autoHandoff = false +) +var result = sql("select count(*) from streaming.stream_table_empty").collect() +assert(result(0).getLong(0) == 10) + +// clean checkpointDir and logDir +val carbonTable = CarbonEnv.getCarbonTable(Option("streaming"), "stream_table_empty")(spark) +FileFactory + .deleteAllFilesOfDir(new File(CarbonTablePath.getStreamingLogDir(carbonTable.getTablePath))) +FileFactory + .dele
carbondata git commit: [CARBONDATA-2987] Data mismatch after compaction with measure sort columns
Repository: carbondata Updated Branches: refs/heads/master 6ef4e46b0 -> 0b16816da [CARBONDATA-2987] Data mismatch after compaction with measure sort columns problem: Data mismatch after compaction with measure sort columns root cause : In compaction flow (DictionaryBasedResultCollector), in ColumnPageWrapper inverted index mapping is not handled. Because of this row ID was wrong, row of no dictionary dimension columns gets data from other rows. Hence the data mismatch. solution: Handle inverted index mapping for DictionaryBasedResultCollector flow in ColumnPageWrapper This closes #2784 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0b16816d Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0b16816d Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0b16816d Branch: refs/heads/master Commit: 0b16816da7b401318929bfe973dad4bf397e90d9 Parents: 6ef4e46 Author: ajantha-bhat Authored: Fri Sep 28 16:27:55 2018 +0530 Committer: ravipesala Committed: Wed Oct 3 20:09:13 2018 +0530 -- .../chunk/store/ColumnPageWrapper.java | 52 +++ .../compaction/nodictionary_compaction.csv | 3 + .../MajorCompactionWithMeasureSortColumns.scala | 97 3 files changed, 136 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/0b16816d/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java index 098287e..627c75f 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java @@ -122,10 +122,19 @@ public class ColumnPageWrapper implements DimensionColumnPage { } @Override public byte[] getChunkData(int rowId) { -return getChunkData(rowId, false); +byte[] nullBitSet = getNullBitSet(rowId, columnPage.getColumnSpec().getColumnType()); +if (nullBitSet != null) { + // if this row is null, return default null represent in byte array + return nullBitSet; +} else { + if (isExplicitSorted()) { +rowId = getInvertedReverseIndex(rowId); + } + return getChunkDataInBytes(rowId); +} } - private byte[] getChunkData(int rowId, boolean isRowIdChanged) { + private byte[] getChunkDataInBytes(int rowId) { ColumnType columnType = columnPage.getColumnSpec().getColumnType(); DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType(); DataType targetDataType = columnPage.getDataType(); @@ -134,15 +143,6 @@ public class ColumnPageWrapper implements DimensionColumnPage { .getDictionaryValue(CarbonUtil.getSurrogateInternal(columnPage.getBytes(rowId), 0, 3)); } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && isAdaptiveEncoded()) || ( columnType == ColumnType.PLAIN_VALUE && DataTypeUtil.isPrimitiveColumn(srcDataType))) { - if (!isRowIdChanged && columnPage.getNullBits().get(rowId) - && columnType == ColumnType.COMPLEX_PRIMITIVE) { -// if this row is null, return default null represent in byte array -return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; - } - if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) { -// if this row is null, return default null represent in byte array -return CarbonCommonConstants.EMPTY_BYTE_ARRAY; - } if (srcDataType == DataTypes.FLOAT) { float floatData = columnPage.getFloat(rowId); return ByteUtil.toXorBytes(floatData); @@ -182,9 +182,6 @@ public class ColumnPageWrapper implements DimensionColumnPage { throw new RuntimeException("unsupported type: " + targetDataType); } } else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && !isAdaptiveEncoded())) { - if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) { -return CarbonCommonConstants.EMPTY_BYTE_ARRAY; - } if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN)) { byte[] out = new byte[1]; out[0] = (columnPage.getByte(rowId)); @@ -205,6 +202,18 @@ public class ColumnPageWrapper implements DimensionColumnPage { } } + private byte[] getNullBitSet(int rowId, ColumnType columnType) { +if (columnPage.getNullBits().get(rowId) && columnType == ColumnType.COMPLEX_PRIMITIVE) { + // i
carbondata git commit: [HOTFIX] carbon reader support open another reader without closing previous reader
Repository: carbondata Updated Branches: refs/heads/master 9ca985f0d -> 6ef4e46b0 [HOTFIX] carbon reader support open another reader without closing previous reader [HOTFIX] carbon reader support open another reader without closing previous reader This closes #2790 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6ef4e46b Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6ef4e46b Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6ef4e46b Branch: refs/heads/master Commit: 6ef4e46b0df2d3954788222943df09bf07fdb120 Parents: 9ca985f Author: ajantha-bhat Authored: Fri Sep 28 20:24:39 2018 +0530 Committer: ravipesala Committed: Wed Oct 3 20:06:05 2018 +0530 -- .../hadoop/api/CarbonInputFormat.java | 26 .../sdk/file/CarbonReaderBuilder.java | 7 ++ .../carbondata/sdk/file/CarbonReaderTest.java | 3 +-- 3 files changed, 8 insertions(+), 28 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ef4e46b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java -- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java index db93cbd..ed82e13 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonInputFormat.java @@ -774,29 +774,13 @@ m filterExpression */ public String[] projectAllColumns(CarbonTable carbonTable) { List colList = carbonTable.getTableInfo().getFactTable().getListOfColumns(); -List projectColumn = new ArrayList<>(); -// childCount will recursively count the number of children for any parent +List projectColumns = new ArrayList<>(); // complex type and add just the parent column name while skipping the child columns. -int childDimCount = 0; -for (ColumnSchema cols : colList) { - if (cols.getSchemaOrdinal() != -1) { -if (childDimCount == 0) { - projectColumn.add(cols.getColumnName()); -} -if (childDimCount > 0) { - childDimCount--; -} -if (cols.getDataType().isComplexType()) { - childDimCount += cols.getNumberOfChild(); -} +for (ColumnSchema col : colList) { + if (!col.getColumnName().contains(".")) { +projectColumns.add(col.getColumnName()); } } -String[] projectionColumns = new String[projectColumn.size()]; -int i = 0; -for (String columnName : projectColumn) { - projectionColumns[i] = columnName; - i++; -} -return projectionColumns; +return projectColumns.toArray(new String[projectColumns.size()]); } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ef4e46b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java -- diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java index 151d57c..9651a8f 100644 --- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java +++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java @@ -131,11 +131,8 @@ public class CarbonReaderBuilder { hadoopConf = FileFactory.getConfiguration(); } CarbonTable table; -if (filterExpression != null) { - table = CarbonTable.buildTable(tablePath, tableName, hadoopConf); -} else { - table = CarbonTable.buildDummyTable(tablePath); -} +// now always infer schema. TODO:Refactor in next version. +table = CarbonTable.buildTable(tablePath, tableName, hadoopConf); final CarbonFileInputFormat format = new CarbonFileInputFormat(); final Job job = new Job(hadoopConf); format.setTableInfo(job.getConfiguration(), table.getTableInfo()); http://git-wip-us.apache.org/repos/asf/carbondata/blob/6ef4e46b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java -- diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java index 8d95456..ba8a49d 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java @@ -90,8 +90,6 @@ public class CarbonReaderTest extends TestCase { }
carbondata git commit: Problem: Preaggregate select queries require table path of parent table to access dictionary files. Therefore in executor CarbonMetadata class was used to get parent table objec
Repository: carbondata Updated Branches: refs/heads/master 682160fa1 -> 9ca985f0d Problem: Preaggregate select queries require table path of parent table to access dictionary files. Therefore in executor CarbonMetadata class was used to get parent table object. As CarbonMetadata class is only meant to be used in driver and is not filled with carbontable objects for select queries therefore the query was throwing NPE. Solution: Pass parent table path from driver to executor by adding a new variable in RelationIdentifier. This will not be written to thrift, instead will be used to carry tablePath property from driver to executor. This closes #2786 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/9ca985f0 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/9ca985f0 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/9ca985f0 Branch: refs/heads/master Commit: 9ca985f0d96380d96dae6fdab2d4ee014d5ac345 Parents: 682160f Author: kunal642 Authored: Fri Sep 28 17:41:32 2018 +0530 Committer: ravipesala Committed: Wed Oct 3 20:02:49 2018 +0530 -- .../core/metadata/schema/table/RelationIdentifier.java | 12 .../carbondata/core/scan/executor/util/QueryUtil.java | 10 ++ .../spark/sql/CarbonDatasourceHadoopRelation.scala | 12 3 files changed, 26 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/9ca985f0/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/RelationIdentifier.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/RelationIdentifier.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/RelationIdentifier.java index 9a1dad1..0e8042d 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/RelationIdentifier.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/RelationIdentifier.java @@ -32,6 +32,8 @@ public class RelationIdentifier implements Serializable, Writable { private String tableId; + private String tablePath = ""; + public RelationIdentifier(String databaseName, String tableName, String tableId) { this.databaseName = databaseName; this.tableName = tableName; @@ -50,16 +52,26 @@ public class RelationIdentifier implements Serializable, Writable { return tableId; } + public String getTablePath() { +return tablePath; + } + + public void setTablePath(String tablePath) { +this.tablePath = tablePath; + } + @Override public void write(DataOutput out) throws IOException { out.writeUTF(databaseName); out.writeUTF(tableName); out.writeUTF(tableId); +out.writeUTF(tablePath); } @Override public void readFields(DataInput in) throws IOException { this.databaseName = in.readUTF(); this.tableName = in.readUTF(); this.tableId = in.readUTF(); +this.tablePath = in.readUTF(); } @Override public boolean equals(Object o) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/9ca985f0/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java index 9fb0857..7849d10 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java @@ -393,12 +393,6 @@ public class QueryUtil { public static AbsoluteTableIdentifier getTableIdentifierForColumn( CarbonDimension carbonDimension) { -RelationIdentifier parentRelationIdentifier = - carbonDimension.getColumnSchema().getParentColumnTableRelations().get(0) -.getRelationIdentifier(); -String parentTablePath = CarbonMetadata.getInstance() -.getCarbonTable(parentRelationIdentifier.getDatabaseName(), -parentRelationIdentifier.getTableName()).getTablePath(); RelationIdentifier relation = carbonDimension.getColumnSchema() .getParentColumnTableRelations() .get(0) @@ -406,8 +400,8 @@ public class QueryUtil { String parentTableName = relation.getTableName(); String parentDatabaseName = relation.getDatabaseName(); String parentTableId = relation.getTableId(); -return AbsoluteTableIdentifier.from(parentTablePath, parentDatabaseName, parentTableName, -parentTableId); +return AbsoluteTableIdentifier.from(relation.getTablePath(), parentDatabaseName, +pa
carbondata git commit: [CARBONDATA-2979] select count fails when carbondata file is written through SDK and read through sparkfileformat for complex datatype map(struct->array->map)
Repository: carbondata Updated Branches: refs/heads/master e9a198ab6 -> 682160fa1 [CARBONDATA-2979] select count fails when carbondata file is written through SDK and read through sparkfileformat for complex datatype map(struct->array->map) Problem Select query failed issue for map type when data is loaded using avro SDK and external table using carbon file format is used to query the data Analysis When data is loaded through Avro SDK which has a schema of type struct, fieldName was hard coded to val because of which during query the schema written in the file footer and schema inferred for the external table had a mismatch which lead to failure. Solution Instead of hard coding the field value as val use the given field name in the schema This closes #2774 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/682160fa Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/682160fa Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/682160fa Branch: refs/heads/master Commit: 682160fa1bbde5f13c8a28e0114d3f18e5ffaf79 Parents: e9a198a Author: manishgupta88 Authored: Thu Sep 27 18:02:34 2018 +0530 Committer: ravipesala Committed: Wed Oct 3 19:57:50 2018 +0530 -- .../datasource/SparkCarbonDataSourceTest.scala | 63 +++- .../sql/carbondata/datasource/TestUtil.scala| 56 - .../carbondata/sdk/file/AvroCarbonWriter.java | 11 ++-- 3 files changed, 122 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/682160fa/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala -- diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala index 3be8cb3..37677d0 100644 --- a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala +++ b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala @@ -1117,11 +1117,11 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll { } private def createParquetTable { - FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(s"$warehouse1/../warehouse2")) +val path = FileFactory.getUpdatedFilePath(s"$warehouse1/../warehouse2") +FileFactory.deleteAllCarbonFilesOfDir(FileFactory.getCarbonFile(s"$path")) spark.sql(s"create table par_table(male boolean, age int, height double, name string, address " + s"string," + - s"salary long, floatField float, bytefield byte) using parquet location " + - s"'$warehouse1/../warehouse2'") + s"salary long, floatField float, bytefield byte) using parquet location '$path'") (0 to 10).foreach { i => spark.sql(s"insert into par_table select 'true','$i', ${i.toDouble / 2}, 'name$i', " + s"'address$i', ${i*100}, $i.$i, '$i'") @@ -1181,6 +1181,63 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll { } } + def buildStructSchemaWithNestedArrayOfMapTypeAsValue(writerPath: String, rows: Int): Unit = { +FileFactory.deleteAllFilesOfDir(new File(writerPath)) +val mySchema = + """ +|{ +| "name": "address", +| "type": "record", +| "fields": [ +|{ +| "name": "name", +| "type": "string" +|}, +|{ +| "name": "age", +| "type": "int" +|}, +|{ +| "name": "structRecord", +| "type": { +|"type": "record", +|"name": "my_address", +|"fields": [ +| { +|"name": "street", +|"type": "string" +| }, +| { +|"name": "houseDetails", +|"type": { +| "type": "array", +| "items": { +| "name
carbondata git commit: [HOTFIX] Fix NPE in LRU cache when entry from the same table is getting evicted to load another entry from same table
Repository: carbondata Updated Branches: refs/heads/master 5c0da31a5 -> 2a4f53001 [HOTFIX] Fix NPE in LRU cache when entry from the same table is getting evicted to load another entry from same table Problem When driver LRU cache size is configured to a small value then on running concurrent queries sometimes while loading the block dataMap in LRU cache one of the dataMap entries from the same table is getting deleted because of shortage of space. Due to this in the flow after loading the dataMap cache NPE is thrown. This is because when an cacheable entry is removed from LRU cache then invalidate is called on that cacheable entry to clear the unsafe memory used by that entry. Invalidate method makes the references null and clears the unsafe memory which leads to NPE when accessed again. Solution Currently dataMap cache uses unsafe offheap memory for datamap caching. To avoid this the code is modified to use unsafe with onheap so that JVM itself takes care of clearing the memory when required. We do not require to explicitly set the references to null. This closes #2759 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2a4f5300 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2a4f5300 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2a4f5300 Branch: refs/heads/master Commit: 2a4f53001058346843e0248c60fee2943087efc9 Parents: 5c0da31 Author: manishgupta88 Authored: Tue Sep 25 19:21:08 2018 +0530 Committer: ravipesala Committed: Thu Sep 27 12:08:28 2018 +0530 -- .../indexstore/BlockletDataMapIndexWrapper.java | 1 - .../core/indexstore/SafeMemoryDMStore.java | 1 - .../core/indexstore/UnsafeMemoryDMStore.java | 16 +--- .../indexstore/blockletindex/BlockDataMap.java | 2 -- .../core/memory/HeapMemoryAllocator.java | 8 ++-- .../core/memory/UnsafeMemoryManager.java | 19 +++ .../util/AbstractDataFileFooterConverter.java| 4 ++-- 7 files changed, 32 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/2a4f5300/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexWrapper.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexWrapper.java index 7b8a13b..33d69aa 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexWrapper.java @@ -70,7 +70,6 @@ public class BlockletDataMapIndexWrapper implements Cacheable, Serializable { for (DataMap dataMap : dataMaps) { dataMap.clear(); } -dataMaps = null; } public List getDataMaps() { http://git-wip-us.apache.org/repos/asf/carbondata/blob/2a4f5300/core/src/main/java/org/apache/carbondata/core/indexstore/SafeMemoryDMStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/SafeMemoryDMStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/SafeMemoryDMStore.java index 0b3d4d8..042790f 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/SafeMemoryDMStore.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/SafeMemoryDMStore.java @@ -62,7 +62,6 @@ public class SafeMemoryDMStore extends AbstractMemoryDMStore { if (!isMemoryFreed) { if (null != dataMapRows) { dataMapRows.clear(); -dataMapRows = null; } isMemoryFreed = true; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/2a4f5300/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java index 3e8ce12..196559a 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java @@ -19,6 +19,7 @@ package org.apache.carbondata.core.indexstore; import org.apache.carbondata.core.indexstore.row.DataMapRow; import org.apache.carbondata.core.indexstore.row.UnsafeDataMapRow; import org.apache.carbondata.core.indexstore.schema.CarbonRowSchema; +import org.apache.carbondata.core.memory.MemoryAllocator; import org.apache.carbondata.core.memory.MemoryBlock; imp
svn commit: r29698 - in /dev/carbondata/1.5.0-rc1: ./ apache-carbondata-1.5.0-source-release.zip apache-carbondata-1.5.0-source-release.zip.asc apache-carbondata-1.5.0-source-release.zip.md5 apache-ca
Author: ravipesala Date: Wed Sep 26 05:50:47 2018 New Revision: 29698 Log: Upload 1.5.0 rc1 Added: dev/carbondata/1.5.0-rc1/ dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip (with props) dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.asc dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.md5 dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.sha512 Added: dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip == Binary file - no diff available. Propchange: dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip -- svn:mime-type = application/octet-stream Added: dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.asc == --- dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.asc (added) +++ dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.asc Wed Sep 26 05:50:47 2018 @@ -0,0 +1,16 @@ +-BEGIN PGP SIGNATURE- + +iQIzBAABCgAdFiEER3EpqJTxH7zLwCVHutcqeKexsu4FAluq7ZIACgkQutcqeKex +su7rBQ/+LgzXaiKmti0bL8uPqim2T3eAIkJFxWlTwZh1kQq1j23bbLp0jG0FuBIx +tn72zi3lY4OU5NshaDFKyC+fHlvjw9KbvuDDld2p97eejpovB9KMXpmEP+mH2xU4 +FAZSLsvaUUjdermbJo/GsxEiVVUdOk5yncdgH36KxZ4lnyNgZ+0rb+VTHR7GmA7y +PFCYKhp7UBacPMhWTN7Vv9FiY7dRwZAgU7YM18GseW8rIM5GZ9XGOdUw4qAo6jvP +KmW9RkVi7k8LLnIw3MrX1MnoWItlLLfbNO6X4i1R3izEnV/e5vsb6wzltmz/nRlq +Yb+9IEwpiuku902RP0X6W7ROU8dH7BDWs4aJZeskntrPXUTNuNIgOmhyD5BWniLr +TqfO+PQ0B6X2Fz0iZzbf82zwtPpAtYgRSfGPMQGpAKkhVzJA8klpv9fYAoVFEPBQ +BBu69R2Q7mc3c5rpskCUyIrblcFltySiKo0fKaYvRucOQYdyTz+bKw+xfbZxBQ0q +gzwXTh5SSy2dCbeUtwtZZTfxpREtaOnX1IljziRgMISbO+GtAZ0cK2uoUt+Kdupx +AQhJqC45UdpCeNU+eP2aQ7YfgEVS2bQHJQnxv8s8RHV0zRjS3Ehjk65QZ27q4gSZ +NpJagGJzJsAtJdrCPL4fMWaf2/y4uhwekWOF4g+qrKhYFYiDxdM= +=uLc5 +-END PGP SIGNATURE- Added: dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.md5 == --- dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.md5 (added) +++ dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.md5 Wed Sep 26 05:50:47 2018 @@ -0,0 +1 @@ +ef63caeade90232b8e9a9690d2a5c872 apache-carbondata-1.5.0-source-release.zip Added: dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.sha512 == --- dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.sha512 (added) +++ dev/carbondata/1.5.0-rc1/apache-carbondata-1.5.0-source-release.zip.sha512 Wed Sep 26 05:50:47 2018 @@ -0,0 +1 @@ +c7bd3f5871fa1a7400e0d615eded9a6924882f597b19947d78a5917bf2ba416b156dbe0a6721a746c69a017f281145bfbf69649036e23e31a50f344c843493ee apache-carbondata-1.5.0-source-release.zip
carbondata git commit: [maven-release-plugin] prepare for next development iteration
Repository: carbondata Updated Branches: refs/heads/branch-1.5 2157741f1 -> ef1068cad [maven-release-plugin] prepare for next development iteration Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/ef1068ca Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/ef1068ca Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/ef1068ca Branch: refs/heads/branch-1.5 Commit: ef1068cadf007cc0fe853a6f8c2ed7e86fb78b0a Parents: 2157741 Author: ravipesala Authored: Wed Sep 26 09:18:35 2018 +0530 Committer: ravipesala Committed: Wed Sep 26 09:18:35 2018 +0530 -- assembly/pom.xml | 2 +- common/pom.xml| 2 +- core/pom.xml | 2 +- datamap/bloom/pom.xml | 2 +- datamap/examples/pom.xml | 2 +- datamap/lucene/pom.xml| 2 +- examples/spark2/pom.xml | 2 +- format/pom.xml| 2 +- hadoop/pom.xml| 2 +- integration/hive/pom.xml | 2 +- integration/presto/pom.xml| 2 +- integration/spark-common-test/pom.xml | 2 +- integration/spark-common/pom.xml | 2 +- integration/spark-datasource/pom.xml | 2 +- integration/spark2/pom.xml| 2 +- pom.xml | 4 ++-- processing/pom.xml| 2 +- store/sdk/pom.xml | 2 +- store/search/pom.xml | 2 +- streaming/pom.xml | 2 +- 20 files changed, 21 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef1068ca/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index bc91147..5480419 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0 +1.5.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef1068ca/common/pom.xml -- diff --git a/common/pom.xml b/common/pom.xml index 0bd6a8d..fabffa0 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0 +1.5.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef1068ca/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index c8c2889..084dfc5 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0 +1.5.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef1068ca/datamap/bloom/pom.xml -- diff --git a/datamap/bloom/pom.xml b/datamap/bloom/pom.xml index 2c8bdc8..23b5dc1 100644 --- a/datamap/bloom/pom.xml +++ b/datamap/bloom/pom.xml @@ -4,7 +4,7 @@ org.apache.carbondata carbondata-parent -1.5.0 +1.5.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef1068ca/datamap/examples/pom.xml -- diff --git a/datamap/examples/pom.xml b/datamap/examples/pom.xml index 5450d87..c063f71 100644 --- a/datamap/examples/pom.xml +++ b/datamap/examples/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0 +1.5.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef1068ca/datamap/lucene/pom.xml -- diff --git a/datamap/lucene/pom.xml b/datamap/lucene/pom.xml index a663140..e2a94f2 100644 --- a/datamap/lucene/pom.xml +++ b/datamap/lucene/pom.xml @@ -4,7 +4,7 @@ org.apache.carbondata carbondata-parent -1.5.0 +1.5.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef1068ca/examples/spark2/pom.xml -- diff --git a/examples/spark2/pom.xml b/examples/spark2/pom.xml index 85631d0..584f0cb 100644 --- a/examples/spark2/pom.xml +++ b/examples/spark2/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0 +1.5.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/ef1068ca/format/pom.xml -- diff --git a/format/pom.xml b/format/pom.xml index fd3a1d1..4d9627d 100644 --- a/format/pom.xml +++ b/format/pom.xml @@ -22,7 +2
[carbondata] Git Push Summary
Repository: carbondata Updated Tags: refs/tags/apache-carbondata-1.5.0-rc1 [created] 04f399158
carbondata git commit: [maven-release-plugin] prepare release apache-carbondata-1.5.0-rc1
Repository: carbondata Updated Branches: refs/heads/branch-1.5 f2398948c -> 2157741f1 [maven-release-plugin] prepare release apache-carbondata-1.5.0-rc1 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/2157741f Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/2157741f Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/2157741f Branch: refs/heads/branch-1.5 Commit: 2157741f1d8cf3f0418ab37e1755a8d4167141a5 Parents: f239894 Author: ravipesala Authored: Wed Sep 26 09:15:06 2018 +0530 Committer: ravipesala Committed: Wed Sep 26 09:15:06 2018 +0530 -- assembly/pom.xml | 2 +- common/pom.xml| 2 +- core/pom.xml | 2 +- datamap/bloom/pom.xml | 6 ++ datamap/examples/pom.xml | 6 ++ datamap/lucene/pom.xml| 6 ++ examples/spark2/pom.xml | 2 +- format/pom.xml| 2 +- hadoop/pom.xml| 2 +- integration/hive/pom.xml | 2 +- integration/presto/pom.xml| 2 +- integration/spark-common-test/pom.xml | 14 +++--- integration/spark-common/pom.xml | 2 +- integration/spark-datasource/pom.xml | 2 +- integration/spark2/pom.xml| 2 +- pom.xml | 4 ++-- processing/pom.xml| 2 +- store/sdk/pom.xml | 6 ++ store/search/pom.xml | 6 ++ streaming/pom.xml | 6 ++ 20 files changed, 33 insertions(+), 45 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/2157741f/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index eee3e0a..bc91147 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0-SNAPSHOT +1.5.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/2157741f/common/pom.xml -- diff --git a/common/pom.xml b/common/pom.xml index 1209388..0bd6a8d 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0-SNAPSHOT +1.5.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/2157741f/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index a7d6f4d..c8c2889 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -22,7 +22,7 @@ org.apache.carbondata carbondata-parent -1.5.0-SNAPSHOT +1.5.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/2157741f/datamap/bloom/pom.xml -- diff --git a/datamap/bloom/pom.xml b/datamap/bloom/pom.xml index d13eb4f..2c8bdc8 100644 --- a/datamap/bloom/pom.xml +++ b/datamap/bloom/pom.xml @@ -1,12 +1,10 @@ -http://maven.apache.org/POM/4.0.0; - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd;> +http://maven.apache.org/POM/4.0.0; xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd;> 4.0.0 org.apache.carbondata carbondata-parent -1.5.0-SNAPSHOT +1.5.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/carbondata/blob/2157741f/datamap/examples/pom.xml -- diff --git a/datamap/examples/pom.xml b/datamap/examples/pom.xml index be65529..5450d87 100644 --- a/datamap/examples/pom.xml +++ b/datamap/examples/pom.xml @@ -15,16 +15,14 @@ See the License for the specific language governing permissions and limitations under the License. --> -http://maven.apache.org/POM/4.0.0; - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd;> +http://maven.apache.org/POM/4.0.0; xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance; xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd;> 4.0.0 org.apache.carbondata carbondata-parent -1.5.0-SNAPSHOT +1.5.0 ../../pom.xml http://git-wip-us.apache.org
[2/2] carbondata git commit: [HOTFIX] Remove MV test from example module as release prepare fails with default profile.
[HOTFIX] Remove MV test from example module as release prepare fails with default profile. This closes #2764 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f2398948 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f2398948 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f2398948 Branch: refs/heads/branch-1.5 Commit: f2398948c992d22174e5e41514785dfa91074d65 Parents: d7913b1 Author: ravipesala Authored: Wed Sep 26 07:46:03 2018 +0530 Committer: ravipesala Committed: Wed Sep 26 07:48:44 2018 +0530 -- .../scala/org/apache/carbondata/examplesCI/RunExamples.scala | 7 --- 1 file changed, 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/f2398948/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala -- diff --git a/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala index d3623ba..2b9b999 100644 --- a/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala +++ b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala @@ -113,11 +113,4 @@ class RunExamples extends QueryTest with BeforeAndAfterAll { test("ExternalTableExample") { ExternalTableExample.exampleBody(spark) } - - test("MVDataMapExample") { -// MV only works for 2.2 and above -if (!spark.sparkContext.version.startsWith("2.1")) { - MVDataMapExample.exampleBody(spark) -} - } } \ No newline at end of file
[1/2] carbondata git commit: [HOTFIX] Fixed data loading failure
Repository: carbondata Updated Branches: refs/heads/branch-1.5 6a2a94d05 -> f2398948c [HOTFIX] Fixed data loading failure Problem: 1.Data loading is failing with ArrayIndexOutOfBoundException when all columns are not considered in sort columns. this is because while filling the sort columns details in CarbonDataProcessorUtil.getNoDictSortColMapping it is not checking whether column is present on sort column or not. 2.Some time in CI testcases are failing because of Negative array exception , still it is not clear from the code the main root cause of failure, this failure may happen in customer actual deployment . Solution: 1.Add only those columns which is present in sort columns 2.Currently disabling unsafe default value to false This closes #2754 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d7913b1f Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d7913b1f Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d7913b1f Branch: refs/heads/branch-1.5 Commit: d7913b1fdbfb9eb080f4f14f2ebff984448710f4 Parents: 6a2a94d Author: kumarvishal09 Authored: Mon Sep 24 19:34:59 2018 +0530 Committer: Jacky Li Committed: Wed Sep 26 00:08:00 2018 +0800 -- .../core/constants/CarbonCommonConstants.java | 2 +- ...feVariableLengthDimensionDataChunkStore.java | 2 +- .../load/DataLoadProcessBuilderOnSpark.scala| 3 +- .../sort/sortdata/NewRowComparator.java | 48 +++- .../processing/sort/sortdata/SortDataRows.java | 7 ++- .../util/CarbonDataProcessorUtil.java | 6 +-- 6 files changed, 26 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 21f1f34..faad0dc 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -1298,7 +1298,7 @@ public final class CarbonCommonConstants { /** * default property of unsafe processing */ - public static final String ENABLE_UNSAFE_IN_QUERY_EXECUTION_DEFAULTVALUE = "true"; + public static final String ENABLE_UNSAFE_IN_QUERY_EXECUTION_DEFAULTVALUE = "false"; /** * whether to prefetch data while loading. http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java index 196dc4c..8553506 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java @@ -139,7 +139,7 @@ public abstract class SafeVariableLengthDimensionDataChunkStore length = dataOffsets[rowId + 1] - (currentDataOffset + getLengthSize()); } else { // for last record - length = (short) (this.data.length - currentDataOffset); + length = this.data.length - currentDataOffset; } DataType dt = vector.getType(); http://git-wip-us.apache.org/repos/asf/carbondata/blob/d7913b1f/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala -- diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala index e810829..2e74a94 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala @@ -86,8 +86,7 @@ object DataLoadProcessBuilderOnSpark { val sortParameters = SortParameters.createSortParameters(configuration) val rowComparator: Comparator[Array[AnyRef]] = if (sortParameters.getNoDictionaryCount > 0) { -new
carbondata git commit: [HOTFIX] Remove MV test from example module as release prepare fails with default profile.
Repository: carbondata Updated Branches: refs/heads/master d7913b1fd -> f2398948c [HOTFIX] Remove MV test from example module as release prepare fails with default profile. This closes #2764 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/f2398948 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/f2398948 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/f2398948 Branch: refs/heads/master Commit: f2398948c992d22174e5e41514785dfa91074d65 Parents: d7913b1 Author: ravipesala Authored: Wed Sep 26 07:46:03 2018 +0530 Committer: ravipesala Committed: Wed Sep 26 07:48:44 2018 +0530 -- .../scala/org/apache/carbondata/examplesCI/RunExamples.scala | 7 --- 1 file changed, 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/f2398948/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala -- diff --git a/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala index d3623ba..2b9b999 100644 --- a/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala +++ b/examples/spark2/src/test/scala/org/apache/carbondata/examplesCI/RunExamples.scala @@ -113,11 +113,4 @@ class RunExamples extends QueryTest with BeforeAndAfterAll { test("ExternalTableExample") { ExternalTableExample.exampleBody(spark) } - - test("MVDataMapExample") { -// MV only works for 2.2 and above -if (!spark.sparkContext.version.startsWith("2.1")) { - MVDataMapExample.exampleBody(spark) -} - } } \ No newline at end of file
[carbondata] Git Push Summary
Repository: carbondata Updated Branches: refs/heads/branch-1.5 [created] 6a2a94d05
[2/3] carbondata git commit: [CARBONDATA-2961] Simplify SDK API interfaces
http://git-wip-us.apache.org/repos/asf/carbondata/blob/6a2a94d0/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala -- diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala index 9ccc02c..755a7df 100644 --- a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala +++ b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestCreateTableUsingSparkCarbonFileFormat.scala @@ -68,9 +68,7 @@ class TestCreateTableUsingSparkCarbonFileFormat extends FunSuite with BeforeAndA //getCanonicalPath gives path with \, but the code expects /. writerPath = writerPath.replace("\\", "/"); - val filePath = writerPath + "/Fact/Part0/Segment_null/" - - def buildTestData(persistSchema:Boolean) = { + def buildTestData(): Any = { FileUtils.deleteDirectory(new File(writerPath)) @@ -83,17 +81,9 @@ class TestCreateTableUsingSparkCarbonFileFormat extends FunSuite with BeforeAndA .toString() try { - val builder = CarbonWriter.builder().isTransactionalTable(true) + val builder = CarbonWriter.builder() val writer = -if (persistSchema) { - builder.persistSchemaFile(true) - builder.outputPath(writerPath).buildWriterForCSVInput(Schema.parseJson(schema), spark -.sparkContext.hadoopConfiguration) -} else { - builder.outputPath(writerPath).buildWriterForCSVInput(Schema.parseJson(schema), spark -.sparkContext.hadoopConfiguration) -} - + builder.outputPath(writerPath).withCsvInput(Schema.parseJson(schema)).build() var i = 0 while (i < 100) { writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2))) @@ -126,19 +116,19 @@ class TestCreateTableUsingSparkCarbonFileFormat extends FunSuite with BeforeAndA //TO DO, need to remove segment dependency and tableIdentifier Dependency test("read carbondata files (sdk Writer Output) using the SparkCarbonFileFormat ") { -buildTestData(false) -assert(new File(filePath).exists()) +buildTestData() +assert(new File(writerPath).exists()) spark.sql("DROP TABLE IF EXISTS sdkOutputTable") //data source file format if (SparkUtil.isSparkVersionEqualTo("2.1")) { //data source file format - spark.sql(s"""CREATE TABLE sdkOutputTable USING carbon OPTIONS (PATH '$filePath') """) + spark.sql(s"""CREATE TABLE sdkOutputTable USING carbon OPTIONS (PATH '$writerPath') """) } else if (SparkUtil.isSparkVersionXandAbove("2.2")) { //data source file format spark.sql( s"""CREATE TABLE sdkOutputTable USING carbon LOCATION - |'$filePath' """.stripMargin) + |'$writerPath' """.stripMargin) } else{ // TO DO } @@ -169,55 +159,55 @@ class TestCreateTableUsingSparkCarbonFileFormat extends FunSuite with BeforeAndA spark.sql("DROP TABLE sdkOutputTable") // drop table should not delete the files -assert(new File(filePath).exists()) +assert(new File(writerPath).exists()) cleanTestData() } test("Running SQL directly and read carbondata files (sdk Writer Output) using the SparkCarbonFileFormat ") { -buildTestData(false) -assert(new File(filePath).exists()) +buildTestData() +assert(new File(writerPath).exists()) spark.sql("DROP TABLE IF EXISTS sdkOutputTable") //data source file format if (SparkUtil.isSparkVersionEqualTo("2.1")) { //data source file format - spark.sql(s"""CREATE TABLE sdkOutputTable USING carbon OPTIONS (PATH '$filePath') """) + spark.sql(s"""CREATE TABLE sdkOutputTable USING carbon OPTIONS (PATH '$writerPath') """) } else if (SparkUtil.isSparkVersionXandAbove("2.2")) { //data source file format spark.sql( s"""CREATE TABLE sdkOutputTable USING carbon LOCATION - |'$filePath' """.stripMargin) + |'$writerPath' """.stripMargin) } else { // TO DO } -val directSQL = spark.sql(s"""select * FROM carbon.`$filePath`""".stripMargin) +val directSQL = spark.sql(s"""select * FROM carbon.`$writerPath`""".stripMargin) directSQL.show(false) TestUtil.checkAnswer(spark.sql("select * from sdkOutputTable"), directSQL) spark.sql("DROP TABLE sdkOutputTable") // drop table should not delete the files -assert(new File(filePath).exists()) +assert(new File(writerPath).exists()) cleanTestData() } // TODO: Make the sparkCarbonFileFormat to work
[1/3] carbondata git commit: [CARBONDATA-2961] Simplify SDK API interfaces
Repository: carbondata Updated Branches: refs/heads/master 1d4d24044 -> 6a2a94d05 http://git-wip-us.apache.org/repos/asf/carbondata/blob/6a2a94d0/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java -- diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java index e324aca..8c5ffe5 100644 --- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java +++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java @@ -29,25 +29,19 @@ import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.metadata.datatype.DataTypes; -import org.apache.carbondata.core.metadata.schema.table.DiskBasedDMSchemaStorageProvider; import org.apache.carbondata.core.scan.expression.ColumnExpression; import org.apache.carbondata.core.scan.expression.LiteralExpression; import org.apache.carbondata.core.scan.expression.conditional.*; import org.apache.carbondata.core.scan.expression.logical.AndExpression; import org.apache.carbondata.core.scan.expression.logical.OrExpression; import org.apache.carbondata.core.util.CarbonProperties; -import org.apache.carbondata.core.util.path.CarbonTablePath; import junit.framework.TestCase; import org.apache.commons.io.FileUtils; -import org.apache.commons.lang.CharEncoding; -import org.apache.hadoop.conf.Configuration; import org.junit.*; public class CarbonReaderTest extends TestCase { - private Configuration conf = new Configuration(false); - @Before public void cleanFile() { assert (TestUtil.cleanMdtFile()); @@ -73,10 +67,10 @@ public class CarbonReaderTest extends TestCase { fields[0] = new Field("name", DataTypes.STRING); fields[1] = new Field("age", DataTypes.INT); -TestUtil.writeFilesAndVerify(200, new Schema(fields), path, true); +TestUtil.writeFilesAndVerify(200, new Schema(fields), path); -CarbonReader reader = CarbonReader.builder(path, "_temp").isTransactionalTable(true) -.projection(new String[]{"name", "age"}).build(conf); +CarbonReader reader = CarbonReader.builder(path, "_temp") +.projection(new String[]{"name", "age"}).build(); // expected output after sorting String[] name = new String[200]; @@ -101,9 +95,8 @@ public class CarbonReaderTest extends TestCase { // Read again CarbonReader reader2 = CarbonReader .builder(path, "_temp") -.isTransactionalTable(true) .projection(new String[]{"name", "age"}) -.build(conf); +.build(); i = 0; while (reader2.hasNext()) { @@ -120,84 +113,6 @@ public class CarbonReaderTest extends TestCase { } @Test - public void testReadWithFilterOfTransactional() throws IOException, InterruptedException { -String path = "./testWriteFiles"; -FileUtils.deleteDirectory(new File(path)); - -Field[] fields = new Field[2]; -fields[0] = new Field("name", DataTypes.STRING); -fields[1] = new Field("age", DataTypes.INT); - -TestUtil.writeFilesAndVerify(200, new Schema(fields), path, true); - -EqualToExpression equalToExpression = new EqualToExpression( -new ColumnExpression("name", DataTypes.STRING), -new LiteralExpression("robot1", DataTypes.STRING)); -CarbonReader reader = CarbonReader -.builder(path, "_temp") -.isTransactionalTable(true) -.projection(new String[]{"name", "age"}) -.filter(equalToExpression) -.build(conf); - -int i = 0; -while (reader.hasNext()) { - Object[] row = (Object[]) reader.readNextRow(); - // Default sort column is applied for dimensions. So, need to validate accordingly - assert ("robot1".equals(row[0])); - i++; -} -Assert.assertEquals(i, 20); - -reader.close(); - -FileUtils.deleteDirectory(new File(path)); - } - - @Test - public void testReadWithFilterOfTransactionalAnd() throws IOException, InterruptedException { -String path = "./testWriteFiles"; -FileUtils.deleteDirectory(new File(path)); - -Field[] fields = new Field[3]; -fields[0] = new Field("name", DataTypes.STRING); -fields[1] = new Field("age", DataTypes.INT); -fields[2] = new Field("doubleField", DataTypes.DOUBLE); - -TestUtil.writeFilesAndVerify(200, new Schema(fields), path, true); - -ColumnExpression columnExpression = new ColumnExpression("doubleField", DataTypes.DOUBLE); -EqualToExpression equalToExpression = new EqualToExpression(columnExpression, -new LiteralExpression("3.5", DataTypes.DOUBLE)); - -ColumnExpression columnExpression2 = new ColumnExpression("name", DataTypes.STRING); -EqualToExpression
[3/3] carbondata git commit: [CARBONDATA-2961] Simplify SDK API interfaces
[CARBONDATA-2961] Simplify SDK API interfaces [CARBONDATA-2961] Simplify SDK API interfaces problem: current SDK API interfaces are not simpler and don't follow builder pattern. If new features are added, it will become more complex. Solution: Simplify the SDK interfaces as per builder pattern. Refer the latest sdk-guide. Added: changes in Carbon Writer: public CarbonWriterBuilder withThreadSafe(short numOfThreads) public CarbonWriterBuilder withHadoopConf(Configuration conf) public CarbonWriterBuilder withCsvInput(Schema schema) public CarbonWriterBuilder withAvroInput(org.apache.avro.Schema avroSchema) public CarbonWriterBuilder withJsonInput(Schema carbonSchema) public CarbonWriter build() throws IOException, InvalidLoadOptionException Changes in carbon Reader public CarbonReaderBuilder withHadoopConf(Configuration conf) public CarbonWriter build() throws IOException, InvalidLoadOptionException Removed: changes in Carbon Writer: public CarbonWriterBuilder isTransactionalTable(boolean isTransactionalTable) public CarbonWriterBuilder persistSchemaFile(boolean persist) setAccessKey setAccessKey setSecretKey setSecretKey setEndPoint setEndPoint public CarbonWriter buildWriterForCSVInput(Schema schema, Configuration configuration) public CarbonWriter buildThreadSafeWriterForCSVInput(Schema schema, short numOfThreads,Configuration configuration) public CarbonWriter buildWriterForAvroInput(org.apache.avro.Schema avroSchema,Configuration configuration) public CarbonWriter buildThreadSafeWriterForAvroInput(org.apache.avro.Schema avroSchema,short numOfThreads, Configuration configuration) public JsonCarbonWriter buildWriterForJsonInput(Schema carbonSchema, Configuration configuration) public JsonCarbonWriter buildThreadSafeWriterForJsonInput(Schema carbonSchema, short numOfThreads,Configuration configuration) Changes in carbon Reader public CarbonReaderBuilder isTransactionalTable(boolean isTransactionalTable) public CarbonWriter build(Configuration conf) throws IOException, InvalidLoadOptionException This closes #2961 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6a2a94d0 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6a2a94d0 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6a2a94d0 Branch: refs/heads/master Commit: 6a2a94d057d33435c16405d1da2e682cb748a77d Parents: 1d4d240 Author: ajantha-bhat Authored: Fri Sep 21 16:32:56 2018 +0530 Committer: ravipesala Committed: Tue Sep 25 20:59:50 2018 +0530 -- docs/sdk-guide.md | 208 +++-- .../examples/sdk/CarbonReaderExample.java | 7 +- .../carbondata/examples/sdk/SDKS3Example.java | 21 +- .../examples/sdk/SDKS3ReadExample.java | 6 +- .../carbondata/examples/DirectSQLExample.scala | 12 +- .../carbondata/examples/S3UsingSDkExample.scala | 18 +- .../sdv/generated/SDKwriterTestCase.scala | 67 +-- ...FileInputFormatWithExternalCarbonTable.scala | 30 +- .../TestNonTransactionalCarbonTable.scala | 99 ++--- ...tNonTransactionalCarbonTableForMapType.scala | 6 +- ...tNonTransactionalCarbonTableJsonWriter.scala | 5 +- ...ansactionalCarbonTableWithAvroDataType.scala | 86 ++-- ...ransactionalCarbonTableWithComplexType.scala | 13 +- ...tSparkCarbonFileFormatWithSparkSession.scala | 176 .../datasources/CarbonSparkDataSourceUtil.scala | 1 - .../datasource/SparkCarbonDataSourceTest.scala | 17 +- ...tCreateTableUsingSparkCarbonFileFormat.scala | 85 ++-- .../sdk/file/CarbonReaderBuilder.java | 105 + .../sdk/file/CarbonWriterBuilder.java | 342 --- .../apache/carbondata/sdk/file/TestUtil.java| 44 +- .../carbondata/store/MetaCachedCarbonStore.java | 21 +- .../sdk/file/AvroCarbonWriterTest.java | 72 +-- .../sdk/file/CSVCarbonWriterTest.java | 86 ++-- .../CSVNonTransactionalCarbonWriterTest.java| 298 - .../carbondata/sdk/file/CarbonReaderTest.java | 434 --- .../sdk/file/ConcurrentAvroSdkWriterTest.java | 9 +- .../sdk/file/ConcurrentSdkWriterTest.java | 9 +- .../carbondata/store/LocalCarbonStoreTest.java | 2 +- 28 files changed, 486 insertions(+), 1793 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6a2a94d0/docs/sdk-guide.md -- diff --git a/docs/sdk-guide.md b/docs/sdk-guide.md index dc21efd..d1e4bc5 100644 --- a/docs/sdk-guide.md +++ b/docs/sdk-guide.md @@ -67,9 +67,9 @@ These SDK writer output contains just a carbondata and carbonindex files. No met CarbonProperties.getInstance().addProperty("enable.offheap.sort", enableOffheap); - CarbonWriterBuilder builder = CarbonWrit
carbondata git commit: [CARBONDATA-2964] Fix for unsupported float data type bug
Repository: carbondata Updated Branches: refs/heads/master 5443b227b -> 1d4d24044 [CARBONDATA-2964] Fix for unsupported float data type bug Problem: If multiple pages are present for the blocklet then during comparator creation, float and byte check was not there therefore Unsupported data type exception was thrown. Byte data was being read as Double. Solution: Add check for float In AbstractScannedResultCollector Byte condition was not present due to which it was going to Double flow. This closes #2753 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/1d4d2404 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/1d4d2404 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/1d4d2404 Branch: refs/heads/master Commit: 1d4d24044d01a6e2a5cc5806eb89a6ee9532afca Parents: 5443b22 Author: kunal642 Authored: Fri Sep 21 18:36:58 2018 +0530 Committer: ravipesala Committed: Tue Sep 25 16:17:12 2018 +0530 -- .../impl/AbstractScannedResultCollector.java| 2 ++ .../core/util/CarbonMetadataUtil.java | 16 - docs/ddl-of-carbondata.md | 2 +- docs/sdk-guide.md | 2 ++ docs/supported-data-types-in-carbondata.md | 4 +++ .../datasource/SparkCarbonDataSourceTest.scala | 37 .../sdk/file/CSVCarbonWriterTest.java | 2 ++ 7 files changed, 63 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d4d2404/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/AbstractScannedResultCollector.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/AbstractScannedResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/AbstractScannedResultCollector.java index b20954a..051bef0 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/AbstractScannedResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/AbstractScannedResultCollector.java @@ -145,6 +145,8 @@ public abstract class AbstractScannedResultCollector implements ScannedResultCol return dataChunk.getLong(index); } else if (dataType == DataTypes.FLOAT) { return dataChunk.getFloat(index); + } else if (dataType == DataTypes.BYTE) { +return dataChunk.getByte(index); } else if (DataTypes.isDecimal(dataType)) { BigDecimal bigDecimalMsrValue = dataChunk.getDecimal(index); if (null != bigDecimalMsrValue && carbonMeasure.getScale() > bigDecimalMsrValue.scale()) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d4d2404/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java index 0167c9a..231f8c4 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java @@ -464,7 +464,7 @@ public class CarbonMetadataUtil { private static int compareMeasureData(byte[] first, byte[] second, DataType dataType) { ByteBuffer firstBuffer = null; ByteBuffer secondBuffer = null; -if (dataType == DataTypes.BOOLEAN) { +if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE) { return first[0] - second[0]; } else if (dataType == DataTypes.DOUBLE) { firstBuffer = ByteBuffer.allocate(8); @@ -480,6 +480,20 @@ public class CarbonMetadataUtil { compare = -1; } return (int) compare; +} else if (dataType == DataTypes.FLOAT) { + firstBuffer = ByteBuffer.allocate(8); + firstBuffer.put(first); + secondBuffer = ByteBuffer.allocate(8); + secondBuffer.put(second); + firstBuffer.flip(); + secondBuffer.flip(); + double compare = firstBuffer.getFloat() - secondBuffer.getFloat(); + if (compare > 0) { +compare = 1; + } else if (compare < 0) { +compare = -1; + } + return (int) compare; } else if (dataType == DataTypes.LONG || dataType == DataTypes.INT || dataType == DataTypes.SHORT) { firstBuffer = ByteBuffer.allocate(8); http://git-wip-us.apache.org/repos/asf/carbondata/blob/1d4d2404/docs/ddl-of-carbondata.md -- diff --git a/docs/ddl-of-carbondata.md b/docs/ddl-of-carbondata.md index acaac43..2a467a2 100644 --- a/docs/ddl-of-carbondata.md +++ b/docs/
carbondata git commit: [CARBONDATA-2947] Adaptive encoding support for timestamp no dictionary and Refactor ColumnPageWrapper
Repository: carbondata Updated Branches: refs/heads/master 786db2171 -> 5443b227b [CARBONDATA-2947] Adaptive encoding support for timestamp no dictionary and Refactor ColumnPageWrapper Support adaptive encoding for Timestamp data type in case of no dictionary column This closes #2736 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/5443b227 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/5443b227 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/5443b227 Branch: refs/heads/master Commit: 5443b227b806eecc81299f3c50d54d0b66fb00bb Parents: 786db21 Author: dhatchayani Authored: Wed Sep 19 19:09:04 2018 +0530 Committer: ravipesala Committed: Tue Sep 25 16:08:39 2018 +0530 -- .../datastore/chunk/DimensionColumnPage.java| 12 +++ .../chunk/impl/AbstractDimensionColumnPage.java | 10 +++ .../chunk/store/ColumnPageWrapper.java | 91 ++-- .../page/encoding/DefaultEncodingFactory.java | 3 +- .../core/datastore/row/WriteStepRowUtil.java| 7 ++ .../core/scan/executor/util/QueryUtil.java | 32 +++ .../carbondata/core/scan/filter/FilterUtil.java | 19 ++-- ...RowLevelRangeLessThanFilterExecuterImpl.java | 8 +- .../carbondata/core/util/CarbonUnsafeUtil.java | 4 +- .../carbondata/core/util/DataTypeUtil.java | 80 +++-- .../datamap/bloom/BloomCoarseGrainDataMap.java | 2 +- .../datamap/bloom/DataConvertUtil.java | 2 +- .../datamap/IndexDataMapRebuildRDD.scala| 11 ++- .../TestStreamingTableOperation.scala | 2 +- .../converter/impl/FieldEncoderFactory.java | 22 +++-- .../impl/MeasureFieldConverterImpl.java | 45 +++--- .../converter/impl/RowConverterImpl.java| 17 +++- .../loading/sort/SortStepRowHandler.java| 11 ++- .../merger/CompactionResultSortProcessor.java | 44 +- .../carbondata/processing/store/TablePage.java | 16 +++- .../streaming/CarbonStreamRecordWriter.java | 3 +- 21 files changed, 385 insertions(+), 56 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/5443b227/core/src/main/java/org/apache/carbondata/core/datastore/chunk/DimensionColumnPage.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/DimensionColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/DimensionColumnPage.java index 50fa09a..fa2b73e 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/DimensionColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/DimensionColumnPage.java @@ -16,6 +16,8 @@ */ package org.apache.carbondata.core.datastore.chunk; +import java.util.BitSet; + import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; /** @@ -102,4 +104,14 @@ public interface DimensionColumnPage { */ void freeMemory(); + /** + * to check whether the page is adaptive encoded + */ + boolean isAdaptiveEncoded(); + + /** + * to get the null bit sets in case of adaptive encoded page + */ + BitSet getNullBits(); + } http://git-wip-us.apache.org/repos/asf/carbondata/blob/5443b227/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/AbstractDimensionColumnPage.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/AbstractDimensionColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/AbstractDimensionColumnPage.java index d400952..fdf57a9 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/AbstractDimensionColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/AbstractDimensionColumnPage.java @@ -16,6 +16,8 @@ */ package org.apache.carbondata.core.datastore.chunk.impl; +import java.util.BitSet; + import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.chunk.store.DimensionDataChunkStore; @@ -44,6 +46,14 @@ public abstract class AbstractDimensionColumnPage implements DimensionColumnPage return dataChunkStore.isExplicitSorted(); } + @Override public boolean isAdaptiveEncoded() { +return false; + } + + @Override public BitSet getNullBits() { +return null; + } + /** * Below method to get the data based in row id * http://git-wip-us.apache.org/repos/asf/carbondata/blob/5443b227/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datast
carbondata git commit: [Compatibility] fix legacy store compatibility issue
Repository: carbondata Updated Branches: refs/heads/master 23bd6e9af -> 0ad90820a [Compatibility] fix legacy store compatibility issue Problem: Migrate a table from 1.1 version with one segment to the latest version and do one load. Now the table will have 1 legacy segment and 1 non-legacy segment. During query when the blocklet count is calculated for both the blocks. Legacy segment should give 0 to avoid blocklet pruning as min max is not there and nonLegacy segment should give 1 so that all the blocklets are scanned for query. Datamap schema is cached to avoid calculating it again. For segment2 the datamap schema of segment1 is being used due to which isLegacy flag is being misused. Solution: Pass 0 as blocklet count for legacy store and calculate blocklet count only for new segments. Be sure to do all of the following checklist to help us incorporate your contribution quickly and easily: This closes #2746 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0ad90820 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0ad90820 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0ad90820 Branch: refs/heads/master Commit: 0ad90820acd78b8158edb4025e559df363af6800 Parents: 23bd6e9 Author: kunal642 Authored: Fri Sep 21 18:43:22 2018 +0530 Committer: ravipesala Committed: Mon Sep 24 16:53:35 2018 +0530 -- .../indexstore/blockletindex/BlockDataMap.java | 21 ++-- 1 file changed, 10 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ad90820/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java index 8a1538e..0cf9914 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockDataMap.java @@ -238,6 +238,10 @@ public class BlockDataMap extends CoarseGrainDataMap blockMetaInfo, updatedMinValues, updatedMaxValues, minMaxFlag); } } +List blockletCountList = new ArrayList<>(); +blockletCountList.add((short) 0); +byte[] blockletCount = convertRowCountFromShortToByteArray(blockletCountList); +summaryRow.setByteArray(blockletCount, taskSummarySchema.length - 1); setMinMaxFlagForTaskSummary(summaryRow, taskSummarySchema, segmentProperties, minMaxFlag); return summaryRow; } @@ -627,17 +631,12 @@ public class BlockDataMap extends CoarseGrainDataMap // get total blocklet number in this datamap protected int getTotalBlocklets() { -if (isLegacyStore) { - // dummy value - return 0; -} else { - ByteBuffer byteBuffer = ByteBuffer.wrap(getBlockletRowCountForEachBlock()); - int sum = 0; - while (byteBuffer.hasRemaining()) { -sum += byteBuffer.getShort(); - } - return sum; +ByteBuffer byteBuffer = ByteBuffer.wrap(getBlockletRowCountForEachBlock()); +int sum = 0; +while (byteBuffer.hasRemaining()) { + sum += byteBuffer.getShort(); } +return sum; } private List prune(FilterResolverIntf filterExp) { @@ -1000,7 +999,7 @@ public class BlockDataMap extends CoarseGrainDataMap SegmentPropertiesAndSchemaHolder.getInstance() .getSegmentPropertiesWrapper(segmentPropertiesIndex); try { - return segmentPropertiesWrapper.getTaskSummarySchema(!isLegacyStore, isFilePathStored); + return segmentPropertiesWrapper.getTaskSummarySchema(true, isFilePathStored); } catch (MemoryException e) { throw new RuntimeException(e); }
[2/2] carbondata git commit: [CARBONDATA-2948] Float and Byte DataType support
[CARBONDATA-2948] Float and Byte DataType support Background Currently float is supported by internally storing the data as double and changing the data type to Double. This poses some problems while using SparkCarbonFileFormat for reading the float type data. Internally as the data type is changed from Float to Double therefore the data is retrieved as a Double page instead of float. If the user tried to create a table using file format by specifying the datatype as float for any column then the query will fail. User is restricted to use double to retrieve the data. Proposed Solution Add support for float data type and store the date as a FloatPage. Most of the methods that are used for double can be reused for float. This closes #2726 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/edfcdca0 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/edfcdca0 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/edfcdca0 Branch: refs/heads/master Commit: edfcdca0ac3dcf92a22ffd82557dbff036ec7428 Parents: b04269b Author: kunal642 Authored: Fri Sep 14 15:26:10 2018 +0530 Committer: ravipesala Committed: Fri Sep 21 17:54:10 2018 +0530 -- .../chunk/store/ColumnPageWrapper.java | 16 +- .../core/datastore/page/ColumnPage.java | 10 + .../core/datastore/page/DecimalColumnPage.java | 5 + .../core/datastore/page/LazyColumnPage.java | 7 +- .../datastore/page/LocalDictColumnPage.java | 4 + .../datastore/page/SafeFixLengthColumnPage.java | 10 + .../page/UnsafeFixLengthColumnPage.java | 14 ++ .../datastore/page/VarLengthColumnPageBase.java | 6 + .../page/encoding/ColumnPageEncoderMeta.java| 8 + .../page/encoding/DefaultEncodingFactory.java | 11 +- .../page/encoding/adaptive/AdaptiveCodec.java | 4 + .../adaptive/AdaptiveDeltaFloatingCodec.java| 7 +- .../adaptive/AdaptiveFloatingCodec.java | 14 +- .../statistics/ColumnPageStatsCollector.java| 1 + .../page/statistics/DummyStatsCollector.java| 4 + .../page/statistics/KeyPageStatsCollector.java | 4 + .../page/statistics/LVStringStatsCollector.java | 4 + .../statistics/PrimitivePageStatsCollector.java | 57 + .../ThriftWrapperSchemaConverterImpl.java | 8 + .../core/metadata/datatype/DataType.java| 2 +- .../impl/AbstractScannedResultCollector.java| 2 + .../carbondata/core/scan/filter/FilterUtil.java | 21 ++ .../executer/ExcludeFilterExecuterImpl.java | 21 +- .../executer/IncludeFilterExecuterImpl.java | 25 +-- .../scan/result/vector/CarbonColumnVector.java | 2 + .../vector/MeasureDataVectorProcessor.java | 111 ++ .../vector/impl/CarbonColumnVectorImpl.java | 4 + .../apache/carbondata/core/util/ByteUtil.java | 12 +- .../carbondata/core/util/CarbonUnsafeUtil.java | 2 +- .../apache/carbondata/core/util/CarbonUtil.java | 11 +- .../carbondata/core/util/DataTypeUtil.java | 23 ++ .../core/util/comparator/Comparator.java| 19 ++ format/src/main/thrift/schema.thrift| 2 + .../presto/CarbonColumnVectorWrapper.java | 6 + .../TestNonTransactionalCarbonTable.scala | 6 +- .../vectorreader/ColumnarVectorWrapper.java | 6 + .../VectorizedCarbonRecordReader.java | 5 +- .../datasources/CarbonSparkDataSourceUtil.scala | 3 + .../spark/sql/util/SparkTypeConverter.scala | 2 + .../datasource/SparkCarbonDataSourceTest.scala | 215 ++- ...tCreateTableUsingSparkCarbonFileFormat.scala | 5 +- .../sql/carbondata/datasource/TestUtil.scala| 16 +- .../spark/sql/CarbonDataFrameWriter.scala | 2 +- .../loading/sort/SortStepRowHandler.java| 8 + .../carbondata/sdk/file/AvroCarbonWriter.java | 8 +- .../sdk/file/AvroCarbonWriterTest.java | 42 .../sdk/file/CSVCarbonWriterTest.java | 183 +++- .../sdk/file/ConcurrentAvroSdkWriterTest.java | 2 +- .../sdk/file/ConcurrentSdkWriterTest.java | 2 +- 49 files changed, 865 insertions(+), 97 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/edfcdca0/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java index 176a3e9..71cfc46 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/ColumnPageWrapper.java @@ -143,14 +143,12 @@ public
[1/2] carbondata git commit: [CARBONDATA-2948] Float and Byte DataType support
Repository: carbondata Updated Branches: refs/heads/master b04269b2b -> edfcdca0a http://git-wip-us.apache.org/repos/asf/carbondata/blob/edfcdca0/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestUtil.scala -- diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestUtil.scala b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestUtil.scala index 8b0eca8..b9185aa 100644 --- a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestUtil.scala +++ b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/TestUtil.scala @@ -51,10 +51,7 @@ object TestUtil { .addProperty(CarbonCommonConstants.CARBON_MINMAX_ALLOWED_BYTE_COUNT, "40") def checkAnswer(df: DataFrame, expectedAnswer: java.util.List[Row]):Unit = { -checkAnswer(df, expectedAnswer.asScala) match { - case Some(errorMessage) => assert(false, errorMessage) - case None => -} +checkAnswer(df, expectedAnswer.asScala) } def checkExistence(df: DataFrame, exists: Boolean, keywords: String*) { @@ -69,10 +66,7 @@ object TestUtil { } def checkAnswer(df: DataFrame, expectedAnswer: DataFrame): Unit = { -checkAnswer(df, expectedAnswer.collect()) match { - case Some(errorMessage) => assert(false, errorMessage) - case None => -} +checkAnswer(df, expectedAnswer.collect()) } /** @@ -83,7 +77,7 @@ object TestUtil { * @param df the [[DataFrame]] to be executed * @param expectedAnswer the expected result in a [[Seq]] of [[Row]]s. */ - def checkAnswer(df: DataFrame, expectedAnswer: Seq[Row]): Option[String] = { + def checkAnswer(df: DataFrame, expectedAnswer: Seq[Row]): Unit = { val isSorted = df.logicalPlan.collect { case s: logical.Sort => s }.nonEmpty def prepareAnswer(answer: Seq[Row]): Seq[Row] = { // Converts data to types that we can do equality comparison using Scala collections. @@ -136,10 +130,8 @@ object TestUtil { prepareAnswer(sparkAnswer).map(_.toString())).mkString("\n") } """.stripMargin - return Some(errorMessage) + assert(false, errorMessage) } - -return None } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/edfcdca0/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDataFrameWriter.scala -- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDataFrameWriter.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDataFrameWriter.scala index cd189d2..f335509 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDataFrameWriter.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonDataFrameWriter.scala @@ -68,7 +68,7 @@ class CarbonDataFrameWriter(sqlContext: SQLContext, val dataFrame: DataFrame) { case IntegerType => CarbonType.INT.getName case ShortType => CarbonType.SHORT.getName case LongType => CarbonType.LONG.getName - case FloatType => CarbonType.DOUBLE.getName + case FloatType => CarbonType.FLOAT.getName case DoubleType => CarbonType.DOUBLE.getName case TimestampType => CarbonType.TIMESTAMP.getName case DateType => CarbonType.DATE.getName http://git-wip-us.apache.org/repos/asf/carbondata/blob/edfcdca0/processing/src/main/java/org/apache/carbondata/processing/loading/sort/SortStepRowHandler.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/SortStepRowHandler.java b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/SortStepRowHandler.java index 99b3779..1262fde 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/sort/SortStepRowHandler.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/sort/SortStepRowHandler.java @@ -388,6 +388,10 @@ public class SortStepRowHandler implements Serializable { tmpContent = rowBuffer.getLong(); } else if (DataTypes.DOUBLE == tmpDataType) { tmpContent = rowBuffer.getDouble(); +} else if (DataTypes.FLOAT == tmpDataType) { + tmpContent = rowBuffer.getFloat(); +} else if (DataTypes.BYTE == tmpDataType) { + tmpContent = rowBuffer.get(); } else if (DataTypes.isDecimal(tmpDataType)) { short len = rowBuffer.getShort(); byte[] decimalBytes = new byte[len]; @@ -829,6 +833,10 @@ public class SortStepRowHandler implements Serializable { reUsableByteArrayDataOutputStream.writeLong((Long) tmpValue); } else if (DataTypes.DOUBLE == tmpDataType) { reUsableByteArrayDataOutputStream.writeDouble((Double) tmpValue); +} else if
carbondata git commit: [CARBONDATA-2927] multiple issue fixes for varchar column and complex columns, row that grows more than 2MB
Repository: carbondata Updated Branches: refs/heads/master b54512d1c -> d1bfb7477 [CARBONDATA-2927] multiple issue fixes for varchar column and complex columns, row that grows more than 2MB 1. varchar data length is more than 2MB, buffer overflow exception (thread local row buffer) root casue*: thread* loaclbuffer was hardcoded with 2MB. solution: grow dynamically based on the row size. 2. read data from carbon file having one row of varchar data with 150 MB length is very slow. root casue: At UnsafeDMStore, ensure memory is just incresing by 8KB each time and lot of time malloc and free happens before reaching 150MB. hence very slow performance. solution: directly check and allocate the required size. 3. Jvm crash when data size is more than 128 MB in unsafe sort step. root cause: unsafeCarbonRowPage is of 128MB, so if data is more than 128MB for one row, we access block beyond allocated, leading to JVM crash. solution: validate the size before access and prompt user to increase unsafe memory. (by carbon property) This closes #2706 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d1bfb747 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d1bfb747 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d1bfb747 Branch: refs/heads/master Commit: d1bfb7477ac98bc942c739596c9ed24aa944d867 Parents: b54512d Author: ajantha-bhat Authored: Mon Sep 10 23:34:56 2018 +0530 Committer: ravipesala Committed: Wed Sep 19 18:02:41 2018 +0530 -- .../core/indexstore/UnsafeMemoryDMStore.java| 20 ++-- .../core/memory/UnsafeMemoryManager.java| 3 +- .../util/ReUsableByteArrayDataOutputStream.java | 47 .../sdv/generated/SDKwriterTestCase.scala | 30 + .../loading/sort/SortStepRowHandler.java| 113 +++ .../sort/unsafe/UnsafeCarbonRowPage.java| 26 +++-- .../loading/sort/unsafe/UnsafeSortDataRows.java | 55 + .../processing/sort/sortdata/SortDataRows.java | 15 +-- .../store/CarbonFactDataHandlerColumnar.java| 1 + 9 files changed, 213 insertions(+), 97 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1bfb747/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java index cdf6c56..3e8ce12 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java @@ -60,8 +60,8 @@ public class UnsafeMemoryDMStore extends AbstractMemoryDMStore { * @param rowSize */ private void ensureSize(int rowSize) throws MemoryException { -while (runningLength + rowSize >= allocatedSize) { - increaseMemory(); +if (runningLength + rowSize >= allocatedSize) { + increaseMemory(runningLength + rowSize); } if (this.pointers.length <= rowCount + 1) { int[] newPointer = new int[pointers.length + 1000]; @@ -70,14 +70,14 @@ public class UnsafeMemoryDMStore extends AbstractMemoryDMStore { } } - private void increaseMemory() throws MemoryException { -MemoryBlock allocate = -UnsafeMemoryManager.allocateMemoryWithRetry(taskId, allocatedSize + capacity); -getUnsafe().copyMemory(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset(), -allocate.getBaseObject(), allocate.getBaseOffset(), runningLength); -UnsafeMemoryManager.INSTANCE.freeMemory(taskId, memoryBlock); -allocatedSize = allocatedSize + capacity; -memoryBlock = allocate; + private void increaseMemory(int requiredMemory) throws MemoryException { +MemoryBlock newMemoryBlock = +UnsafeMemoryManager.allocateMemoryWithRetry(taskId, allocatedSize + requiredMemory); +getUnsafe().copyMemory(this.memoryBlock.getBaseObject(), this.memoryBlock.getBaseOffset(), +newMemoryBlock.getBaseObject(), newMemoryBlock.getBaseOffset(), runningLength); +UnsafeMemoryManager.INSTANCE.freeMemory(taskId, this.memoryBlock); +allocatedSize = allocatedSize + requiredMemory; +this.memoryBlock = newMemoryBlock; } /** http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1bfb747/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java -- diff --git a/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java b/core/src/main/java/org/apache/carbondata/core/memory/UnsafeMemoryManager.java index 8fcbb6c..e3593c5 100644 --- a/c
carbondata git commit: [CARBONDATA-2935] Write is_sorter in footer for compaction
Repository: carbondata Updated Branches: refs/heads/master 077dd58e3 -> 05033f71e [CARBONDATA-2935] Write is_sorter in footer for compaction carbondata.thrift is modified to add is_sorted in footer to indicate whether the file is sorted, which will help compaction to decide whether to use merge sort or not This closes #2720 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/05033f71 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/05033f71 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/05033f71 Branch: refs/heads/master Commit: 05033f71e028d708f1a7d23b0d01775134be3078 Parents: 077dd58 Author: Jacky Li Authored: Fri Sep 14 18:28:25 2018 +0800 Committer: ravipesala Committed: Tue Sep 18 22:13:12 2018 +0530 -- format/src/main/thrift/carbondata.thrift | 1 + .../sql/CarbonGetTableDetailComandTestCase.scala | 6 +++--- .../store/CarbonFactDataHandlerColumnar.java | 2 +- .../store/writer/AbstractFactDataWriter.java | 4 ++-- .../store/writer/CarbonFactDataWriter.java | 2 +- .../writer/v3/CarbonFactDataWriterImplV3.java | 17 + 6 files changed, 21 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/format/src/main/thrift/carbondata.thrift -- diff --git a/format/src/main/thrift/carbondata.thrift b/format/src/main/thrift/carbondata.thrift index 7130066..ec51ff7 100644 --- a/format/src/main/thrift/carbondata.thrift +++ b/format/src/main/thrift/carbondata.thrift @@ -205,6 +205,7 @@ struct FileFooter3{ 3: required list blocklet_index_list; // Blocklet index of all blocklets in this file 4: optional list blocklet_info_list3; // Information about blocklets of all columns in this file for V3 format 5: optional dictionary.ColumnDictionaryChunk dictionary; // Blocklet local dictionary +6: optional bool is_sort; // True if the data is sorted in this file, it is used for compaction to decide whether to use merge sort or not } /** http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala -- diff --git a/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala b/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala index fcb6110..2669417 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala @@ -42,10 +42,10 @@ class CarbonGetTableDetailCommandTestCase extends QueryTest with BeforeAndAfterA assertResult(2)(result.length) assertResult("table_info1")(result(0).getString(0)) -// 2220 is the size of carbon table. Note that since 1.5.0, we add additional compressor name in metadata -assertResult(2220)(result(0).getLong(1)) +// 2221 is the size of carbon table. Note that since 1.5.0, we add additional compressor name in metadata +assertResult(2221)(result(0).getLong(1)) assertResult("table_info2")(result(1).getString(0)) -assertResult(2220)(result(1).getLong(1)) +assertResult(2221)(result(1).getLong(1)) } override def afterAll: Unit = { http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java -- diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java index c23b071..cf51941 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java @@ -374,7 +374,7 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler { } consumerExecutorService.shutdownNow(); processWriteTaskSubmitList(consumerExecutorServiceTaskList); - this.dataWriter.writeFooterToFile(); + this.dataWriter.writeFooter(); LOGGER.info("All blocklets have been finished writing"); // close all the open stream for both the files this.dataWriter.closeWriter(); http://git-wip-us.apache.org/repos/asf/carbondata/blob/05033f71/processing/src/main/java/org/apache/c
carbondata git commit: [CARBONDATA-2932] FIx CarbonReader Projection cann't be empty exception
Repository: carbondata Updated Branches: refs/heads/master 04084c73f -> 02a89a9c6 [CARBONDATA-2932] FIx CarbonReader Projection cann't be empty exception run org.apache.carbondata.examples.sdk.CarbonReaderExample and then some exception. This PR fixd it. conclude: 1.close carbonReader after finished read, which solved the exception. 2.add example code for varchar and array, including read and write This closes #2717 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/02a89a9c Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/02a89a9c Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/02a89a9c Branch: refs/heads/master Commit: 02a89a9c6c0426243668653f8dfa20bb70536273 Parents: 04084c7 Author: xubo245 Authored: Thu Sep 13 19:09:30 2018 +0800 Committer: ravipesala Committed: Tue Sep 18 17:28:09 2018 +0530 -- .../examples/sdk/CarbonReaderExample.java | 46 ++-- 1 file changed, 33 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/carbondata/blob/02a89a9c/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java -- diff --git a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java index 4eec4bf..76926ce 100644 --- a/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java +++ b/examples/spark2/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java @@ -21,6 +21,8 @@ import java.io.File; import java.io.FilenameFilter; import java.sql.Date; import java.sql.Timestamp; +import java.util.HashMap; +import java.util.Map; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; @@ -43,7 +45,7 @@ public class CarbonReaderExample { try { FileUtils.deleteDirectory(new File(path)); -Field[] fields = new Field[9]; +Field[] fields = new Field[11]; fields[0] = new Field("stringField", DataTypes.STRING); fields[1] = new Field("shortField", DataTypes.SHORT); fields[2] = new Field("intField", DataTypes.INT); @@ -53,22 +55,28 @@ public class CarbonReaderExample { fields[6] = new Field("dateField", DataTypes.DATE); fields[7] = new Field("timeField", DataTypes.TIMESTAMP); fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2)); - +fields[9] = new Field("varcharField", DataTypes.VARCHAR); +fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING)); +Map map = new HashMap<>(); +map.put("complex_delimiter_level_1", "#"); CarbonWriter writer = CarbonWriter.builder() .outputPath(path) +.withLoadOptions(map) .buildWriterForCSVInput(new Schema(fields), new Configuration(false)); for (int i = 0; i < 10; i++) { String[] row2 = new String[]{ "robot" + (i % 10), -String.valueOf(i), +String.valueOf(i%1), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", -"12.345" +"12.345", +"varchar", +"Hello#World#From#Carbon" }; writer.write(row2); } @@ -106,13 +114,20 @@ public class CarbonReaderExample { int i = 0; while (reader.hasNext()) { Object[] row = (Object[]) reader.readNextRow(); - System.out.println(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", + System.out.println(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", i, row[0], row[1], row[2], row[3], row[4], row[5], -new Date((day * ((int) row[6]))), new Timestamp((long) row[7] / 1000), row[8] +new Date((day * ((int) row[6]))), new Timestamp((long) row[7] / 1000), +row[8], row[9] )); +Object[] arr = (Object[]) row[10]; +for (int j = 0; j < arr.l