[CARBONDATA-2757][BloomDataMap] Fix bug when building bloomfilter on measure column
1. support to get raw data from decimal column page when building datamap in loading process 2. convert decimal column to java datatype when rebuilding bloom datamap from query result 3. convert boolean to byte as carbon wants 4. fix bugs when measure column is null This closes #2526 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8046bca5 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8046bca5 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8046bca5 Branch: refs/heads/branch-1.4 Commit: 8046bca59c38b807e78507a89725e8d72aacaab2 Parents: 92b2070 Author: Manhua <kevin...@qq.com> Authored: Thu Jul 19 16:26:18 2018 +0800 Committer: ravipesala <ravi.pes...@gmail.com> Committed: Tue Jul 31 00:10:41 2018 +0530 ---------------------------------------------------------------------- .../core/datastore/page/ColumnPage.java | 3 + .../core/datastore/page/DecimalColumnPage.java | 48 ++++++ .../datastore/page/SafeDecimalColumnPage.java | 21 --- .../datastore/page/UnsafeDecimalColumnPage.java | 23 --- .../bloom/AbstractBloomDataMapWriter.java | 10 +- .../datamap/bloom/BloomCoarseGrainDataMap.java | 12 +- .../datamap/bloom/DataConvertUtil.java | 22 ++- .../datamap/IndexDataMapRebuildRDD.scala | 14 +- .../bloom/BloomCoarseGrainDataMapSuite.scala | 169 +++++++++++++++++++ 9 files changed, 270 insertions(+), 52 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java index ea250cf..75e47de 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java @@ -525,6 +525,9 @@ public abstract class ColumnPage { result = getBoolean(rowId); } else if (dataType == DataTypes.BYTE) { result = getByte(rowId); + if (columnSpec.getSchemaDataType() == DataTypes.BOOLEAN) { + result = BooleanConvert.byte2Boolean((byte)result); + } } else if (dataType == DataTypes.SHORT) { result = getShort(rowId); } else if (dataType == DataTypes.INT) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/core/src/main/java/org/apache/carbondata/core/datastore/page/DecimalColumnPage.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecimalColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecimalColumnPage.java index 2624223..368a289 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecimalColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecimalColumnPage.java @@ -17,8 +17,11 @@ package org.apache.carbondata.core.datastore.page; +import java.math.BigDecimal; + import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.datatype.DecimalConverterFactory; /** @@ -106,4 +109,49 @@ public abstract class DecimalColumnPage extends VarLengthColumnPageBase { throw new UnsupportedOperationException("invalid data type: " + dataType); } + // used for building datamap in loading process + private BigDecimal getDecimalFromRawData(int rowId) { + long value; + switch (decimalConverter.getDecimalConverterType()) { + case DECIMAL_INT: + value = getInt(rowId); + break; + case DECIMAL_LONG: + value = getLong(rowId); + break; + default: + value = getByte(rowId); + } + return decimalConverter.getDecimal(value); + } + + private BigDecimal getDecimalFromDecompressData(int rowId) { + long value; + if (dataType == DataTypes.BYTE) { + value = getByte(rowId); + } else if (dataType == DataTypes.SHORT) { + value = getShort(rowId); + } else if (dataType == DataTypes.SHORT_INT) { + value = getShortInt(rowId); + } else if (dataType == DataTypes.INT) { + value = getInt(rowId); + } else if (dataType == DataTypes.LONG) { + value = getLong(rowId); + } else { + return decimalConverter.getDecimal(getBytes(rowId)); + } + return decimalConverter.getDecimal(value); + } + + @Override + public BigDecimal getDecimal(int rowId) { + // rowOffset is initialed for query in `VarLengthColumnPageBase.getDecimalColumnPage` + // if its size is 0, we are in loading process and the data in column page is raw + if (rowOffset.getActualRowCount() == 0) { + return getDecimalFromRawData(rowId); + } else { + return getDecimalFromDecompressData(rowId); + } + } + } http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java index c220fa4..89ac4a4 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java @@ -21,7 +21,6 @@ import java.math.BigDecimal; import org.apache.carbondata.core.datastore.TableSpec; import org.apache.carbondata.core.metadata.datatype.DataType; -import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.util.ByteUtil; /** @@ -172,26 +171,6 @@ public class SafeDecimalColumnPage extends DecimalColumnPage { } @Override - public BigDecimal getDecimal(int rowId) { - long value; - if (dataType == DataTypes.BYTE) { - value = getByte(rowId); - } else if (dataType == DataTypes.SHORT) { - value = getShort(rowId); - } else if (dataType == DataTypes.SHORT_INT) { - value = getShortInt(rowId); - } else if (dataType == DataTypes.INT) { - value = getInt(rowId); - } else if (dataType == DataTypes.LONG) { - value = getLong(rowId); - } else { - byte[] bytes = byteArrayData[rowId]; - return decimalConverter.getDecimal(bytes); - } - return decimalConverter.getDecimal(value); - } - - @Override public void copyBytes(int rowId, byte[] dest, int destOffset, int length) { System.arraycopy(byteArrayData[rowId], 0, dest, destOffset, length); } http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeDecimalColumnPage.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeDecimalColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeDecimalColumnPage.java index e4a949a..96aeac2 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeDecimalColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeDecimalColumnPage.java @@ -230,29 +230,6 @@ public class UnsafeDecimalColumnPage extends DecimalColumnPage { } @Override - public BigDecimal getDecimal(int rowId) { - long value; - if (dataType == DataTypes.BYTE) { - value = getByte(rowId); - } else if (dataType == DataTypes.SHORT) { - value = getShort(rowId); - } else if (dataType == DataTypes.SHORT_INT) { - value = getShortInt(rowId); - } else if (dataType == DataTypes.INT) { - value = getInt(rowId); - } else if (dataType == DataTypes.LONG) { - value = getLong(rowId); - } else { - int length = rowOffset.getInt(rowId + 1) - rowOffset.getInt(rowId); - byte[] bytes = new byte[length]; - CarbonUnsafe.getUnsafe().copyMemory(baseAddress, baseOffset + rowOffset.getInt(rowId), bytes, - CarbonUnsafe.BYTE_ARRAY_OFFSET, length); - return decimalConverter.getDecimal(bytes); - } - return decimalConverter.getDecimal(value); - } - - @Override void copyBytes(int rowId, byte[] dest, int destOffset, int length) { CarbonUnsafe.getUnsafe().copyMemory(baseAddress, baseOffset + rowOffset.getInt(rowId), dest, CarbonUnsafe.BYTE_ARRAY_OFFSET + destOffset, length); http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/AbstractBloomDataMapWriter.java ---------------------------------------------------------------------- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/AbstractBloomDataMapWriter.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/AbstractBloomDataMapWriter.java index 176be6e..c5508fe 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/AbstractBloomDataMapWriter.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/AbstractBloomDataMapWriter.java @@ -31,6 +31,8 @@ import org.apache.carbondata.core.datamap.dev.DataMapWriter; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.datastore.page.ColumnPage; +import org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert; +import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; import org.apache.carbondata.core.util.CarbonUtil; @@ -129,8 +131,12 @@ public abstract class AbstractBloomDataMapWriter extends DataMapWriter { // convert non-dict dimensions to simple bytes without length // convert internal-dict dimensions to simple bytes without any encode if (indexColumns.get(indexColIdx).isMeasure()) { - if (value == null) { - value = DataConvertUtil.getNullValueForMeasure(indexColumns.get(indexColIdx).getDataType()); + // NULL value of all measures are already processed in `ColumnPage.getData` + // or `RawBytesReadSupport.readRow` with actual data type + + // Carbon stores boolean as byte. Here we convert it for `getValueAsBytes` + if (indexColumns.get(indexColIdx).getDataType().equals(DataTypes.BOOLEAN)) { + value = BooleanConvert.boolean2Byte((Boolean)value); } indexValue = CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value); } else { http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java ---------------------------------------------------------------------- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java index 96f3495..26db300 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java @@ -42,6 +42,7 @@ import org.apache.carbondata.core.datamap.dev.DataMapModel; import org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert; import org.apache.carbondata.core.devapi.DictionaryGenerationException; import org.apache.carbondata.core.indexstore.Blocklet; import org.apache.carbondata.core.indexstore.PartitionSpec; @@ -242,7 +243,9 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { Object expressionValue = le.getLiteralExpValue(); Object literalValue; // note that if the datatype is date/timestamp, the expressionValue is long type. - if (le.getLiteralExpDataType() == DataTypes.DATE) { + if (null == expressionValue) { + literalValue = null; + } else if (le.getLiteralExpDataType() == DataTypes.DATE) { DateFormat format = new SimpleDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT); // the below settings are set statically according to DateDirectDirectionaryGenerator format.setLenient(false); @@ -292,7 +295,12 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { if (carbonColumn.isMeasure()) { // for measures, the value is already the type, just convert it to bytes. if (convertedValue == null) { - convertedValue = DataConvertUtil.getNullValueForMeasure(carbonColumn.getDataType()); + convertedValue = DataConvertUtil.getNullValueForMeasure(carbonColumn.getDataType(), + carbonColumn.getColumnSchema().getScale()); + } + // Carbon stores boolean as byte. Here we convert it for `getValueAsBytes` + if (carbonColumn.getDataType().equals(DataTypes.BOOLEAN)) { + convertedValue = BooleanConvert.boolean2Byte((Boolean)convertedValue); } internalFilterValue = CarbonUtil.getValueAsBytes(carbonColumn.getDataType(), convertedValue); } else if (carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY) || http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/DataConvertUtil.java ---------------------------------------------------------------------- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/DataConvertUtil.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/DataConvertUtil.java index 35a4367..f59202d 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/DataConvertUtil.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/DataConvertUtil.java @@ -46,14 +46,30 @@ public class DataConvertUtil { /** * return default null value based on datatype. This method refers to ColumnPage.putNull + * + * Note: since we can not mark NULL with corresponding data type in bloom datamap + * we set/get a `NullValue` for NULL, such that pruning using bloom filter + * will have false positive case if filter value is the `NullValue`. + * This should not affect the correctness of result */ - public static Object getNullValueForMeasure(DataType dataType) { + public static Object getNullValueForMeasure(DataType dataType, int scale) { if (dataType == DataTypes.BOOLEAN) { return false; + } else if (dataType == DataTypes.BYTE) { + return (byte) 0; + } else if (dataType == DataTypes.SHORT) { + return (short) 0; + } else if (dataType == DataTypes.INT) { + return 0; + } else if (dataType == DataTypes.LONG) { + return 0L; + } else if (dataType == DataTypes.DOUBLE) { + return 0.0; } else if (DataTypes.isDecimal(dataType)) { - return BigDecimal.ZERO; + // keep consistence with `DecimalConverter.getDecimal` in loading process + return BigDecimal.valueOf(0, scale); } else { - return 0; + throw new IllegalArgumentException("unsupported data type: " + dataType); } } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala b/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala index 70e5cba..29df1ca 100644 --- a/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala +++ b/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala @@ -33,6 +33,7 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl import org.apache.spark.{CarbonInputMetrics, Partition, TaskContext} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.types.Decimal import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.constants.CarbonCommonConstants @@ -52,6 +53,7 @@ import org.apache.carbondata.core.scan.wrappers.ByteArrayWrapper import org.apache.carbondata.core.statusmanager.SegmentStatusManager import org.apache.carbondata.core.util.{CarbonUtil, TaskMetricsMap} import org.apache.carbondata.core.util.path.CarbonTablePath +import org.apache.carbondata.datamap.bloom.DataConvertUtil import org.apache.carbondata.events.{BuildDataMapPostExecutionEvent, BuildDataMapPreExecutionEvent, OperationContext, OperationListenerBus} import org.apache.carbondata.hadoop.{CarbonInputSplit, CarbonMultiBlockSplit, CarbonProjection, CarbonRecordReader} import org.apache.carbondata.hadoop.api.{CarbonInputFormat, CarbonTableInputFormat} @@ -267,7 +269,17 @@ class RawBytesReadSupport(segmentProperties: SegmentProperties, indexColumns: Ar indexCol2IdxInNoDictArray(col.getColName)) } else { // measures start from 1 - data(1 + indexCol2IdxInMeasureArray(col.getColName)) + val value = data(1 + indexCol2IdxInMeasureArray(col.getColName)) + if (null == value) { + DataConvertUtil.getNullValueForMeasure(col.getDataType, + col.getColumnSchema.getScale) + } else if (DataTypes.isDecimal(col.getDataType)) { + // In rebuild process, value is built for spark + // here we need to convert it to java BigDecimal for carbon + value.asInstanceOf[Decimal].toBigDecimal.bigDecimal + } else { + value + } } } rtn(indexColumns.length) = data(data.length - 3) http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala index 12cd234..7871518 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala @@ -647,6 +647,175 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with } + test("test bloom datamap on all basic data types") { + val originTimestampFormat = CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, + CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) + val originDateFormat = CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) + + CarbonProperties.getInstance().addProperty( + CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd HH:mm:ss") + CarbonProperties.getInstance().addProperty( + CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd") + + val columnNames = "booleanField,shortField,intField,bigintField,doubleField,stringField," + + "timestampField,decimalField,dateField,charField,floatField" + + sql( + s""" + | CREATE TABLE $bloomDMSampleTable( + | booleanField boolean, + | shortField smallint, + | intField int, + | bigintField bigint, + | doubleField double, + | stringField string, + | timestampField timestamp, + | decimalField decimal(18,2), + | dateField date, + | charField string, + | floatField float + | ) + | STORED BY 'carbondata' + """.stripMargin) + + sql( + s""" + | CREATE TABLE $normalTable( + | booleanField boolean, + | shortField smallint, + | intField int, + | bigintField bigint, + | doubleField double, + | stringField string, + | timestampField timestamp, + | decimalField decimal(18,2), + | dateField date, + | charField string, + | floatField float + | ) + | STORED BY 'carbondata' + """.stripMargin) + + // first data load + sql( + s""" + | INSERT INTO TABLE $bloomDMSampleTable + | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5), + | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15), + | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5), + | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) + """.stripMargin) + sql( + s""" + | INSERT INTO TABLE $normalTable + | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5), + | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15), + | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5), + | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) + """.stripMargin) + + // create datamap + sql( + s""" + | CREATE DATAMAP dm_test ON TABLE $bloomDMSampleTable + | USING 'bloomfilter' + | DMProperties('INDEX_COLUMNS'='$columnNames', + | 'BLOOM_SIZE'='640000') + """.stripMargin) + + // second data load + sql( + s""" + | INSERT INTO TABLE $bloomDMSampleTable + | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5), + | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15), + | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5), + | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) + """.stripMargin) + sql( + s""" + | INSERT INTO TABLE $normalTable + | VALUES(true,1,10,100,48.4,'spark','2015/4/23 12:01:01',1.23,'2015/4/23','aaa',2.5), + | (true,1,11,100,44.4,'flink','2015/5/23 12:01:03',23.23,'2015/5/23','ccc',2.15), + | (true,3,14,160,43.4,'hive','2015/7/26 12:01:06',3454.32,'2015/7/26','ff',5.5), + | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) + """.stripMargin) + + // check simply query + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE booleanField = true"), + sql(s"SELECT * FROM $normalTable WHERE booleanField = true")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE shortField = 3"), + sql(s"SELECT * FROM $normalTable WHERE shortField = 3")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE intField = 14"), + sql(s"SELECT * FROM $normalTable WHERE intField = 14")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE bigintField = 100"), + sql(s"SELECT * FROM $normalTable WHERE bigintField = 100")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE doubleField = 43.4"), + sql(s"SELECT * FROM $normalTable WHERE doubleField = 43.4")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE stringField = 'spark'"), + sql(s"SELECT * FROM $normalTable WHERE stringField = 'spark'")) + checkAnswer( + sql(s"SELECT * FROM $bloomDMSampleTable WHERE timestampField = '2015/7/26 12:01:06'"), + sql(s"SELECT * FROM $normalTable WHERE timestampField = '2015/7/26 12:01:06'")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE decimalField = 23.23"), + sql(s"SELECT * FROM $normalTable WHERE decimalField = 23.23")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE dateField = '2015/4/23'"), + sql(s"SELECT * FROM $normalTable WHERE dateField = '2015/4/23'")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE charField = 'ccc'"), + sql(s"SELECT * FROM $normalTable WHERE charField = 'ccc'")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField = 2.5"), + sql(s"SELECT * FROM $normalTable WHERE floatField = 2.5")) + + // check query using null + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE booleanField is null"), + sql(s"SELECT * FROM $normalTable WHERE booleanField is null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE shortField is null"), + sql(s"SELECT * FROM $normalTable WHERE shortField is null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE intField is null"), + sql(s"SELECT * FROM $normalTable WHERE intField is null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE bigintField is null"), + sql(s"SELECT * FROM $normalTable WHERE bigintField is null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE doubleField is null"), + sql(s"SELECT * FROM $normalTable WHERE doubleField is null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE stringField is null"), + sql(s"SELECT * FROM $normalTable WHERE stringField is null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE timestampField is null"), + sql(s"SELECT * FROM $normalTable WHERE timestampField is null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE decimalField is null"), + sql(s"SELECT * FROM $normalTable WHERE decimalField is null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE dateField is null"), + sql(s"SELECT * FROM $normalTable WHERE dateField is null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE charField is null"), + sql(s"SELECT * FROM $normalTable WHERE charField is null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField is null"), + sql(s"SELECT * FROM $normalTable WHERE floatField is null")) + + // check default `NullValue` of measure does not affect result + // Note: Test data has row contains NULL for each column but no corresponding `NullValue`, + // so we should get 0 row if query uses the `NullValue` + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE booleanField = false"), + sql(s"SELECT * FROM $normalTable WHERE booleanField = false")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE shortField = 0"), + sql(s"SELECT * FROM $normalTable WHERE shortField = 0")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE intField = 0"), + sql(s"SELECT * FROM $normalTable WHERE intField = 0")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE bigintField = 0"), + sql(s"SELECT * FROM $normalTable WHERE bigintField = 0")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE doubleField = 0"), + sql(s"SELECT * FROM $normalTable WHERE doubleField = 0")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE decimalField = 0"), + sql(s"SELECT * FROM $normalTable WHERE decimalField = 0")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField = 0"), + sql(s"SELECT * FROM $normalTable WHERE floatField = 0")) + + CarbonProperties.getInstance().addProperty( + CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, originTimestampFormat) + CarbonProperties.getInstance().addProperty( + CarbonCommonConstants.CARBON_DATE_FORMAT, originDateFormat) + } override protected def afterAll(): Unit = { deleteFile(bigFile)