Repository: incubator-carbondata Updated Branches: refs/heads/master fe36dea66 -> e705aadd9
Fixed measure selection with out table order gives wrong result Fixed comment Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/7934d7b8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/7934d7b8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/7934d7b8 Branch: refs/heads/master Commit: 7934d7b8ac03fd98064a213ee01b2b64bece6309 Parents: fe36dea Author: ravipesala <ravi.pes...@gmail.com> Authored: Wed Jan 11 22:47:42 2017 +0530 Committer: jackylk <jacky.li...@huawei.com> Committed: Thu Jan 12 20:16:56 2017 +0800 ---------------------------------------------------------------------- .../impl/DictionaryBasedVectorResultCollector.java | 1 - .../scan/processor/AbstractDataBlockIterator.java | 8 ++++++++ .../spark/sql/execution/CarbonLateDecodeStrategy.scala | 6 +++--- .../carbondata/vectorreader/VectorReaderTestCase.scala | 13 ++++++++++++- 4 files changed, 23 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7934d7b8/core/src/main/java/org/apache/carbondata/scan/collector/impl/DictionaryBasedVectorResultCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/scan/collector/impl/DictionaryBasedVectorResultCollector.java b/core/src/main/java/org/apache/carbondata/scan/collector/impl/DictionaryBasedVectorResultCollector.java index cab7caf..3ce54de 100644 --- a/core/src/main/java/org/apache/carbondata/scan/collector/impl/DictionaryBasedVectorResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/scan/collector/impl/DictionaryBasedVectorResultCollector.java @@ -101,7 +101,6 @@ public class DictionaryBasedVectorResultCollector extends AbstractScannedResultC complexInfo = complexList.toArray(new ColumnVectorInfo[complexList.size()]); Arrays.sort(dictionaryInfo); Arrays.sort(noDictionaryInfo); - Arrays.sort(measureInfo); Arrays.sort(complexInfo); } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7934d7b8/core/src/main/java/org/apache/carbondata/scan/processor/AbstractDataBlockIterator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/scan/processor/AbstractDataBlockIterator.java b/core/src/main/java/org/apache/carbondata/scan/processor/AbstractDataBlockIterator.java index 4fc74b9..f996c16 100644 --- a/core/src/main/java/org/apache/carbondata/scan/processor/AbstractDataBlockIterator.java +++ b/core/src/main/java/org/apache/carbondata/scan/processor/AbstractDataBlockIterator.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.util.List; import org.apache.carbondata.common.CarbonIterator; +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.carbon.datastore.DataRefNode; import org.apache.carbondata.core.carbon.querystatistics.QueryStatisticsModel; import org.apache.carbondata.core.datastorage.store.FileHolder; @@ -43,6 +45,9 @@ import org.apache.carbondata.scan.scanner.impl.NonFilterScanner; */ public abstract class AbstractDataBlockIterator extends CarbonIterator<List<Object[]>> { + private static final LogService LOGGER = + LogServiceFactory.getLogService(AbstractDataBlockIterator.class.getName()); + /** * iterator which will be used to iterate over data blocks */ @@ -85,12 +90,15 @@ public abstract class AbstractDataBlockIterator extends CarbonIterator<List<Obje blockletScanner = new NonFilterScanner(blockExecutionInfo, queryStatisticsModel); } if (blockExecutionInfo.isRawRecordDetailQuery()) { + LOGGER.info("Row based raw collector is used to scan and collect the data"); this.scannerResultAggregator = new RawBasedResultCollector(blockExecutionInfo); } else if (blockExecutionInfo.isVectorBatchCollector()) { + LOGGER.info("Vector based dictionary collector is used to scan and collect the data"); this.scannerResultAggregator = new DictionaryBasedVectorResultCollector(blockExecutionInfo); } else { + LOGGER.info("Row based dictionary collector is used to scan and collect the data"); this.scannerResultAggregator = new DictionaryBasedResultCollector(blockExecutionInfo); } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7934d7b8/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala index ace92fc..2e6989d 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/CarbonLateDecodeStrategy.scala @@ -463,13 +463,13 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { def supportBatchedDataSource(sqlContext: SQLContext, cols: Seq[Attribute]): Boolean = { val enableReader = { if (sqlContext.sparkSession.conf.contains(CarbonCommonConstants.ENABLE_VECTOR_READER)) { - sqlContext.sparkSession.conf.get(CarbonCommonConstants.ENABLE_VECTOR_READER).toBoolean + sqlContext.sparkSession.conf.get(CarbonCommonConstants.ENABLE_VECTOR_READER) } else { System.getProperty(CarbonCommonConstants.ENABLE_VECTOR_READER, - CarbonCommonConstants.ENABLE_VECTOR_READER_DEFAULT).toBoolean + CarbonCommonConstants.ENABLE_VECTOR_READER_DEFAULT) } } - sqlContext.conf.wholeStageEnabled && enableReader && + sqlContext.conf.wholeStageEnabled && enableReader.toBoolean && cols.forall(_.dataType.isInstanceOf[AtomicType]) } } http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/7934d7b8/integration/spark2/src/test/scala/org/apache/spark/carbondata/vectorreader/VectorReaderTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/spark/carbondata/vectorreader/VectorReaderTestCase.scala b/integration/spark2/src/test/scala/org/apache/spark/carbondata/vectorreader/VectorReaderTestCase.scala index 491a4ff..79875fe 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/carbondata/vectorreader/VectorReaderTestCase.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/carbondata/vectorreader/VectorReaderTestCase.scala @@ -19,7 +19,9 @@ package org.apache.spark.carbondata.vectorreader +import org.apache.spark.sql.Row import org.apache.spark.sql.common.util.QueryTest +import org.apache.spark.sql.execution.command.LoadTable import org.apache.spark.sql.execution.{BatchedDataSourceScanExec, RowDataSourceScanExec} import org.scalatest.BeforeAndAfterAll @@ -37,11 +39,14 @@ class VectorReaderTestCase extends QueryTest with BeforeAndAfterAll { .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd") sql( """ - CREATE TABLE default.vectorreader + CREATE TABLE vectorreader (ID Int, date Timestamp, country String, name String, phonetype String, serialname String, salary Int) USING org.apache.spark.sql.CarbonSource + OPTIONS("tableName"="vectorreader") """) + LoadTable(Some("default"), "vectorreader", s"$resourcesPath/dataDiff.csv", Nil, + Map(("use_kettle", "false"))).run(sqlContext.sparkSession) } test("test vector reader") { @@ -66,6 +71,12 @@ class VectorReaderTestCase extends QueryTest with BeforeAndAfterAll { assert(rowReader, "row reader should exist by default") } + test("test vector reader for random measure selection") { + sqlContext.setConf("carbon.enable.vector.reader", "true") + checkAnswer(sql("""select salary, ID from vectorreader where ID=394""".stripMargin), + Seq(Row(15393, 394))) + } + override def afterAll { sql("DROP TABLE IF EXISTS vectorreader") }