[CARBONDATA-2757][BloomDataMap] Fix bug when building bloomfilter on measure 
column

1. support to get raw data from decimal column page when building datamap in 
loading process
2. convert decimal column to java datatype when rebuilding bloom datamap from 
query result
3. convert boolean to byte as carbon wants
4. fix bugs when measure column is null

This closes #2526


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/8046bca5
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/8046bca5
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/8046bca5

Branch: refs/heads/branch-1.4
Commit: 8046bca59c38b807e78507a89725e8d72aacaab2
Parents: 92b2070
Author: Manhua <kevin...@qq.com>
Authored: Thu Jul 19 16:26:18 2018 +0800
Committer: ravipesala <ravi.pes...@gmail.com>
Committed: Tue Jul 31 00:10:41 2018 +0530

----------------------------------------------------------------------
 .../core/datastore/page/ColumnPage.java         |   3 +
 .../core/datastore/page/DecimalColumnPage.java  |  48 ++++++
 .../datastore/page/SafeDecimalColumnPage.java   |  21 ---
 .../datastore/page/UnsafeDecimalColumnPage.java |  23 ---
 .../bloom/AbstractBloomDataMapWriter.java       |  10 +-
 .../datamap/bloom/BloomCoarseGrainDataMap.java  |  12 +-
 .../datamap/bloom/DataConvertUtil.java          |  22 ++-
 .../datamap/IndexDataMapRebuildRDD.scala        |  14 +-
 .../bloom/BloomCoarseGrainDataMapSuite.scala    | 169 +++++++++++++++++++
 9 files changed, 270 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
index ea250cf..75e47de 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
@@ -525,6 +525,9 @@ public abstract class ColumnPage {
       result = getBoolean(rowId);
     } else if (dataType == DataTypes.BYTE) {
       result = getByte(rowId);
+      if (columnSpec.getSchemaDataType() == DataTypes.BOOLEAN) {
+        result = BooleanConvert.byte2Boolean((byte)result);
+      }
     } else if (dataType == DataTypes.SHORT) {
       result = getShort(rowId);
     } else if (dataType == DataTypes.INT) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/core/src/main/java/org/apache/carbondata/core/datastore/page/DecimalColumnPage.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecimalColumnPage.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecimalColumnPage.java
index 2624223..368a289 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/DecimalColumnPage.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/DecimalColumnPage.java
@@ -17,8 +17,11 @@
 
 package org.apache.carbondata.core.datastore.page;
 
+import java.math.BigDecimal;
+
 import org.apache.carbondata.core.datastore.TableSpec;
 import org.apache.carbondata.core.metadata.datatype.DataType;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.metadata.datatype.DecimalConverterFactory;
 
 /**
@@ -106,4 +109,49 @@ public abstract class DecimalColumnPage extends 
VarLengthColumnPageBase {
     throw new UnsupportedOperationException("invalid data type: " + dataType);
   }
 
+  // used for building datamap in loading process
+  private BigDecimal getDecimalFromRawData(int rowId) {
+    long value;
+    switch (decimalConverter.getDecimalConverterType()) {
+      case DECIMAL_INT:
+        value = getInt(rowId);
+        break;
+      case DECIMAL_LONG:
+        value = getLong(rowId);
+        break;
+      default:
+        value = getByte(rowId);
+    }
+    return decimalConverter.getDecimal(value);
+  }
+
+  private BigDecimal getDecimalFromDecompressData(int rowId) {
+    long value;
+    if (dataType == DataTypes.BYTE) {
+      value = getByte(rowId);
+    } else if (dataType == DataTypes.SHORT) {
+      value = getShort(rowId);
+    } else if (dataType == DataTypes.SHORT_INT) {
+      value = getShortInt(rowId);
+    } else if (dataType == DataTypes.INT) {
+      value = getInt(rowId);
+    } else if (dataType == DataTypes.LONG) {
+      value = getLong(rowId);
+    } else {
+      return decimalConverter.getDecimal(getBytes(rowId));
+    }
+    return decimalConverter.getDecimal(value);
+  }
+
+  @Override
+  public BigDecimal getDecimal(int rowId) {
+    // rowOffset is initialed for query in 
`VarLengthColumnPageBase.getDecimalColumnPage`
+    // if its size is 0, we are in loading process and the data in column page 
is raw
+    if (rowOffset.getActualRowCount() == 0) {
+      return getDecimalFromRawData(rowId);
+    } else {
+      return getDecimalFromDecompressData(rowId);
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java
index c220fa4..89ac4a4 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java
@@ -21,7 +21,6 @@ import java.math.BigDecimal;
 
 import org.apache.carbondata.core.datastore.TableSpec;
 import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.util.ByteUtil;
 
 /**
@@ -172,26 +171,6 @@ public class SafeDecimalColumnPage extends 
DecimalColumnPage {
   }
 
   @Override
-  public BigDecimal getDecimal(int rowId) {
-    long value;
-    if (dataType == DataTypes.BYTE) {
-      value = getByte(rowId);
-    } else if (dataType == DataTypes.SHORT) {
-      value = getShort(rowId);
-    } else if (dataType == DataTypes.SHORT_INT) {
-      value = getShortInt(rowId);
-    } else if (dataType == DataTypes.INT) {
-      value = getInt(rowId);
-    } else if (dataType == DataTypes.LONG) {
-      value = getLong(rowId);
-    } else {
-      byte[] bytes = byteArrayData[rowId];
-      return decimalConverter.getDecimal(bytes);
-    }
-    return decimalConverter.getDecimal(value);
-  }
-
-  @Override
   public void copyBytes(int rowId, byte[] dest, int destOffset, int length) {
     System.arraycopy(byteArrayData[rowId], 0, dest, destOffset, length);
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeDecimalColumnPage.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeDecimalColumnPage.java
 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeDecimalColumnPage.java
index e4a949a..96aeac2 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeDecimalColumnPage.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeDecimalColumnPage.java
@@ -230,29 +230,6 @@ public class UnsafeDecimalColumnPage extends 
DecimalColumnPage {
   }
 
   @Override
-  public BigDecimal getDecimal(int rowId) {
-    long value;
-    if (dataType == DataTypes.BYTE) {
-      value = getByte(rowId);
-    } else if (dataType == DataTypes.SHORT) {
-      value = getShort(rowId);
-    } else if (dataType == DataTypes.SHORT_INT) {
-      value = getShortInt(rowId);
-    } else if (dataType == DataTypes.INT) {
-      value = getInt(rowId);
-    } else if (dataType == DataTypes.LONG) {
-      value = getLong(rowId);
-    } else {
-      int length = rowOffset.getInt(rowId + 1) - rowOffset.getInt(rowId);
-      byte[] bytes = new byte[length];
-      CarbonUnsafe.getUnsafe().copyMemory(baseAddress, baseOffset + 
rowOffset.getInt(rowId), bytes,
-          CarbonUnsafe.BYTE_ARRAY_OFFSET, length);
-      return decimalConverter.getDecimal(bytes);
-    }
-    return decimalConverter.getDecimal(value);
-  }
-
-  @Override
   void copyBytes(int rowId, byte[] dest, int destOffset, int length) {
     CarbonUnsafe.getUnsafe().copyMemory(baseAddress, baseOffset + 
rowOffset.getInt(rowId), dest,
         CarbonUnsafe.BYTE_ARRAY_OFFSET + destOffset, length);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/AbstractBloomDataMapWriter.java
----------------------------------------------------------------------
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/AbstractBloomDataMapWriter.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/AbstractBloomDataMapWriter.java
index 176be6e..c5508fe 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/AbstractBloomDataMapWriter.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/AbstractBloomDataMapWriter.java
@@ -31,6 +31,8 @@ import org.apache.carbondata.core.datamap.dev.DataMapWriter;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
 import org.apache.carbondata.core.datastore.page.ColumnPage;
+import org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert;
+import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.metadata.encoder.Encoding;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
 import org.apache.carbondata.core.util.CarbonUtil;
@@ -129,8 +131,12 @@ public abstract class AbstractBloomDataMapWriter extends 
DataMapWriter {
     // convert non-dict dimensions to simple bytes without length
     // convert internal-dict dimensions to simple bytes without any encode
     if (indexColumns.get(indexColIdx).isMeasure()) {
-      if (value == null) {
-        value = 
DataConvertUtil.getNullValueForMeasure(indexColumns.get(indexColIdx).getDataType());
+      // NULL value of all measures are already processed in 
`ColumnPage.getData`
+      // or `RawBytesReadSupport.readRow` with actual data type
+
+      // Carbon stores boolean as byte. Here we convert it for 
`getValueAsBytes`
+      if 
(indexColumns.get(indexColIdx).getDataType().equals(DataTypes.BOOLEAN)) {
+        value = BooleanConvert.boolean2Byte((Boolean)value);
       }
       indexValue = 
CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value);
     } else {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
----------------------------------------------------------------------
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index 96f3495..26db300 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -42,6 +42,7 @@ import org.apache.carbondata.core.datamap.dev.DataMapModel;
 import org.apache.carbondata.core.datamap.dev.cgdatamap.CoarseGrainDataMap;
 import org.apache.carbondata.core.datastore.block.SegmentProperties;
 import org.apache.carbondata.core.datastore.impl.FileFactory;
+import org.apache.carbondata.core.datastore.page.encoding.bool.BooleanConvert;
 import org.apache.carbondata.core.devapi.DictionaryGenerationException;
 import org.apache.carbondata.core.indexstore.Blocklet;
 import org.apache.carbondata.core.indexstore.PartitionSpec;
@@ -242,7 +243,9 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
     Object expressionValue = le.getLiteralExpValue();
     Object literalValue;
     // note that if the datatype is date/timestamp, the expressionValue is 
long type.
-    if (le.getLiteralExpDataType() == DataTypes.DATE) {
+    if (null == expressionValue) {
+      literalValue = null;
+    } else if (le.getLiteralExpDataType() == DataTypes.DATE) {
       DateFormat format = new 
SimpleDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
       // the below settings are set statically according to 
DateDirectDirectionaryGenerator
       format.setLenient(false);
@@ -292,7 +295,12 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
     if (carbonColumn.isMeasure()) {
       // for measures, the value is already the type, just convert it to bytes.
       if (convertedValue == null) {
-        convertedValue = 
DataConvertUtil.getNullValueForMeasure(carbonColumn.getDataType());
+        convertedValue = 
DataConvertUtil.getNullValueForMeasure(carbonColumn.getDataType(),
+            carbonColumn.getColumnSchema().getScale());
+      }
+      // Carbon stores boolean as byte. Here we convert it for 
`getValueAsBytes`
+      if (carbonColumn.getDataType().equals(DataTypes.BOOLEAN)) {
+        convertedValue = BooleanConvert.boolean2Byte((Boolean)convertedValue);
       }
       internalFilterValue = 
CarbonUtil.getValueAsBytes(carbonColumn.getDataType(), convertedValue);
     } else if (carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY) ||

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/DataConvertUtil.java
----------------------------------------------------------------------
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/DataConvertUtil.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/DataConvertUtil.java
index 35a4367..f59202d 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/DataConvertUtil.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/DataConvertUtil.java
@@ -46,14 +46,30 @@ public class DataConvertUtil {
 
   /**
    * return default null value based on datatype. This method refers to 
ColumnPage.putNull
+   *
+   * Note: since we can not mark NULL with corresponding data type in bloom 
datamap
+   * we set/get a `NullValue` for NULL, such that pruning using bloom filter
+   * will have false positive case if filter value is the `NullValue`.
+   * This should not affect the correctness of result
    */
-  public static Object getNullValueForMeasure(DataType dataType) {
+  public static Object getNullValueForMeasure(DataType dataType, int scale) {
     if (dataType == DataTypes.BOOLEAN) {
       return false;
+    } else if (dataType == DataTypes.BYTE) {
+      return (byte) 0;
+    } else if (dataType == DataTypes.SHORT) {
+      return (short) 0;
+    } else if (dataType == DataTypes.INT) {
+      return 0;
+    } else if (dataType == DataTypes.LONG) {
+      return 0L;
+    } else if (dataType == DataTypes.DOUBLE) {
+      return 0.0;
     } else if (DataTypes.isDecimal(dataType)) {
-      return BigDecimal.ZERO;
+      // keep consistence with `DecimalConverter.getDecimal` in loading process
+      return BigDecimal.valueOf(0, scale);
     } else {
-      return 0;
+      throw new IllegalArgumentException("unsupported data type: " + dataType);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala
 
b/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala
index 70e5cba..29df1ca 100644
--- 
a/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala
+++ 
b/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala
@@ -33,6 +33,7 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 import org.apache.spark.{CarbonInputMetrics, Partition, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.types.Decimal
 
 import org.apache.carbondata.common.logging.LogServiceFactory
 import org.apache.carbondata.core.constants.CarbonCommonConstants
@@ -52,6 +53,7 @@ import 
org.apache.carbondata.core.scan.wrappers.ByteArrayWrapper
 import org.apache.carbondata.core.statusmanager.SegmentStatusManager
 import org.apache.carbondata.core.util.{CarbonUtil, TaskMetricsMap}
 import org.apache.carbondata.core.util.path.CarbonTablePath
+import org.apache.carbondata.datamap.bloom.DataConvertUtil
 import org.apache.carbondata.events.{BuildDataMapPostExecutionEvent, 
BuildDataMapPreExecutionEvent, OperationContext, OperationListenerBus}
 import org.apache.carbondata.hadoop.{CarbonInputSplit, CarbonMultiBlockSplit, 
CarbonProjection, CarbonRecordReader}
 import org.apache.carbondata.hadoop.api.{CarbonInputFormat, 
CarbonTableInputFormat}
@@ -267,7 +269,17 @@ class RawBytesReadSupport(segmentProperties: 
SegmentProperties, indexColumns: Ar
           indexCol2IdxInNoDictArray(col.getColName))
       } else {
         // measures start from 1
-        data(1 + indexCol2IdxInMeasureArray(col.getColName))
+        val value = data(1 + indexCol2IdxInMeasureArray(col.getColName))
+        if (null == value) {
+          DataConvertUtil.getNullValueForMeasure(col.getDataType,
+            col.getColumnSchema.getScale)
+        } else if (DataTypes.isDecimal(col.getDataType)) {
+          // In rebuild process, value is built for spark
+          // here we need to convert it to java BigDecimal for carbon
+          value.asInstanceOf[Decimal].toBigDecimal.bigDecimal
+        } else {
+          value
+        }
       }
     }
     rtn(indexColumns.length) = data(data.length - 3)

http://git-wip-us.apache.org/repos/asf/carbondata/blob/8046bca5/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
 
b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
index 12cd234..7871518 100644
--- 
a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
+++ 
b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
@@ -647,6 +647,175 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with 
BeforeAndAfterAll with
 
   }
 
+  test("test bloom datamap on all basic data types") {
+    val originTimestampFormat = CarbonProperties.getInstance().getProperty(
+      CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
+      CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
+    val originDateFormat = CarbonProperties.getInstance().getProperty(
+      CarbonCommonConstants.CARBON_DATE_FORMAT,
+      CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
+
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy/MM/dd HH:mm:ss")
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_DATE_FORMAT, "yyyy/MM/dd")
+
+    val columnNames = 
"booleanField,shortField,intField,bigintField,doubleField,stringField," +
+      "timestampField,decimalField,dateField,charField,floatField"
+
+    sql(
+      s"""
+         | CREATE TABLE $bloomDMSampleTable(
+         |    booleanField boolean,
+         |    shortField smallint,
+         |    intField int,
+         |    bigintField bigint,
+         |    doubleField double,
+         |    stringField string,
+         |    timestampField timestamp,
+         |    decimalField decimal(18,2),
+         |    dateField date,
+         |    charField string,
+         |    floatField float
+         | )
+         | STORED BY 'carbondata'
+       """.stripMargin)
+
+    sql(
+      s"""
+         | CREATE TABLE $normalTable(
+         |    booleanField boolean,
+         |    shortField smallint,
+         |    intField int,
+         |    bigintField bigint,
+         |    doubleField double,
+         |    stringField string,
+         |    timestampField timestamp,
+         |    decimalField decimal(18,2),
+         |    dateField date,
+         |    charField string,
+         |    floatField float
+         | )
+         | STORED BY 'carbondata'
+       """.stripMargin)
+
+    // first data load
+    sql(
+      s"""
+         | INSERT INTO TABLE $bloomDMSampleTable
+         | VALUES(true,1,10,100,48.4,'spark','2015/4/23 
12:01:01',1.23,'2015/4/23','aaa',2.5),
+         | (true,1,11,100,44.4,'flink','2015/5/23 
12:01:03',23.23,'2015/5/23','ccc',2.15),
+         | (true,3,14,160,43.4,'hive','2015/7/26 
12:01:06',3454.32,'2015/7/26','ff',5.5),
+         | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
+       """.stripMargin)
+    sql(
+      s"""
+         | INSERT INTO TABLE $normalTable
+         | VALUES(true,1,10,100,48.4,'spark','2015/4/23 
12:01:01',1.23,'2015/4/23','aaa',2.5),
+         | (true,1,11,100,44.4,'flink','2015/5/23 
12:01:03',23.23,'2015/5/23','ccc',2.15),
+         | (true,3,14,160,43.4,'hive','2015/7/26 
12:01:06',3454.32,'2015/7/26','ff',5.5),
+         | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
+       """.stripMargin)
+
+    // create datamap
+    sql(
+      s"""
+         | CREATE DATAMAP dm_test ON TABLE $bloomDMSampleTable
+         | USING 'bloomfilter'
+         | DMProperties('INDEX_COLUMNS'='$columnNames',
+         | 'BLOOM_SIZE'='640000')
+      """.stripMargin)
+
+    // second data load
+    sql(
+      s"""
+         | INSERT INTO TABLE $bloomDMSampleTable
+         | VALUES(true,1,10,100,48.4,'spark','2015/4/23 
12:01:01',1.23,'2015/4/23','aaa',2.5),
+         | (true,1,11,100,44.4,'flink','2015/5/23 
12:01:03',23.23,'2015/5/23','ccc',2.15),
+         | (true,3,14,160,43.4,'hive','2015/7/26 
12:01:06',3454.32,'2015/7/26','ff',5.5),
+         | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
+       """.stripMargin)
+    sql(
+      s"""
+         | INSERT INTO TABLE $normalTable
+         | VALUES(true,1,10,100,48.4,'spark','2015/4/23 
12:01:01',1.23,'2015/4/23','aaa',2.5),
+         | (true,1,11,100,44.4,'flink','2015/5/23 
12:01:03',23.23,'2015/5/23','ccc',2.15),
+         | (true,3,14,160,43.4,'hive','2015/7/26 
12:01:06',3454.32,'2015/7/26','ff',5.5),
+         | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
+       """.stripMargin)
+
+    // check simply query
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE booleanField = 
true"),
+      sql(s"SELECT * FROM $normalTable WHERE booleanField = true"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE shortField = 3"),
+      sql(s"SELECT * FROM $normalTable WHERE shortField = 3"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE intField = 14"),
+      sql(s"SELECT * FROM $normalTable WHERE intField = 14"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE bigintField = 
100"),
+      sql(s"SELECT * FROM $normalTable WHERE bigintField = 100"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE doubleField = 
43.4"),
+      sql(s"SELECT * FROM $normalTable WHERE doubleField = 43.4"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE stringField = 
'spark'"),
+      sql(s"SELECT * FROM $normalTable WHERE stringField = 'spark'"))
+    checkAnswer(
+      sql(s"SELECT * FROM $bloomDMSampleTable WHERE timestampField = 
'2015/7/26 12:01:06'"),
+      sql(s"SELECT * FROM $normalTable WHERE timestampField = '2015/7/26 
12:01:06'"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE decimalField = 
23.23"),
+      sql(s"SELECT * FROM $normalTable WHERE decimalField = 23.23"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE dateField = 
'2015/4/23'"),
+      sql(s"SELECT * FROM $normalTable WHERE dateField = '2015/4/23'"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE charField = 
'ccc'"),
+      sql(s"SELECT * FROM $normalTable WHERE charField = 'ccc'"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField = 
2.5"),
+      sql(s"SELECT * FROM $normalTable WHERE floatField = 2.5"))
+
+    // check query using null
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE booleanField is 
null"),
+      sql(s"SELECT * FROM $normalTable WHERE booleanField is null"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE shortField is 
null"),
+      sql(s"SELECT * FROM $normalTable WHERE shortField is null"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE intField is 
null"),
+      sql(s"SELECT * FROM $normalTable WHERE intField is null"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE bigintField is 
null"),
+      sql(s"SELECT * FROM $normalTable WHERE bigintField is null"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE doubleField is 
null"),
+      sql(s"SELECT * FROM $normalTable WHERE doubleField is null"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE stringField is 
null"),
+      sql(s"SELECT * FROM $normalTable WHERE stringField is null"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE timestampField 
is null"),
+      sql(s"SELECT * FROM $normalTable WHERE timestampField is null"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE decimalField is 
null"),
+      sql(s"SELECT * FROM $normalTable WHERE decimalField is null"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE dateField is 
null"),
+      sql(s"SELECT * FROM $normalTable WHERE dateField is null"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE charField is 
null"),
+      sql(s"SELECT * FROM $normalTable WHERE charField is null"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField is 
null"),
+      sql(s"SELECT * FROM $normalTable WHERE floatField is null"))
+
+    // check default `NullValue` of measure does not affect result
+    // Note: Test data has row contains NULL for each column but no 
corresponding `NullValue`,
+    // so we should get 0 row if query uses the `NullValue`
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE booleanField = 
false"),
+      sql(s"SELECT * FROM $normalTable WHERE booleanField = false"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE shortField = 0"),
+      sql(s"SELECT * FROM $normalTable WHERE shortField = 0"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE intField = 0"),
+      sql(s"SELECT * FROM $normalTable WHERE intField = 0"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE bigintField = 
0"),
+      sql(s"SELECT * FROM $normalTable WHERE bigintField = 0"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE doubleField = 
0"),
+      sql(s"SELECT * FROM $normalTable WHERE doubleField = 0"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE decimalField = 
0"),
+      sql(s"SELECT * FROM $normalTable WHERE decimalField = 0"))
+    checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE floatField = 0"),
+      sql(s"SELECT * FROM $normalTable WHERE floatField = 0"))
+
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, originTimestampFormat)
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.CARBON_DATE_FORMAT, originDateFormat)
+  }
 
   override protected def afterAll(): Unit = {
     deleteFile(bigFile)

Reply via email to