[CARBONDATA-2616][BloomDataMap] Fix bugs in querying bloom datamap with two 
index columns

During pruning in bloomfilter datamap, the same blocklets has been added
to result more than once, thus causing explaining and querying returning
incorrect result.

This closes #2386


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6eb360e1
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6eb360e1
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6eb360e1

Branch: refs/heads/carbonstore
Commit: 6eb360e1f5f577a576d185efb7dcbf1cc6a302e8
Parents: 01b48fc
Author: xuchuanyin <xuchuan...@hust.edu.cn>
Authored: Wed Jun 20 16:31:28 2018 +0800
Committer: Jacky Li <jacky.li...@qq.com>
Committed: Thu Jun 21 12:03:48 2018 +0800

----------------------------------------------------------------------
 .../datamap/bloom/BloomCoarseGrainDataMap.java    |  5 +++--
 .../bloom/BloomCoarseGrainDataMapSuite.scala      | 18 +++++++++++++++++-
 2 files changed, 20 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/6eb360e1/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
----------------------------------------------------------------------
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index e9af0ff..ed03256 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
@@ -85,7 +86,7 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
   @Override
   public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties 
segmentProperties,
       List<PartitionSpec> partitions) {
-    List<Blocklet> hitBlocklets = new ArrayList<Blocklet>();
+    Set<Blocklet> hitBlocklets = new HashSet<>();
     if (filterExp == null) {
       // null is different from empty here. Empty means after pruning, no 
blocklet need to scan.
       return null;
@@ -111,7 +112,7 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
         }
       }
     }
-    return hitBlocklets;
+    return new ArrayList<>(hitBlocklets);
   }
 
   private byte[] convertValueToBytes(DataType dataType, Object value) {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/6eb360e1/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
 
b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
index 7df3901..c9a4097 100644
--- 
a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
+++ 
b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
@@ -71,7 +71,23 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with 
BeforeAndAfterAll with
     checkAnswer(
       checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 
'city_999'", dataMapName, shouldHit),
       sql(s"select * from $normalTable where city = 'city_999'"))
-     checkAnswer(
+    // query with two index_columns
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id = 1 and 
city='city_1'", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where id = 1 and city='city_1'"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id = 999 
and city='city_999'", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where id = 999 and city='city_999'"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 
'city_1' and id = 0", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city = 'city_1' and id = 0"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 
'city_999' and name='n999'", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city = 'city_999' and 
name='n999'"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 
'city_999' and name='n1'", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city = 'city_999' and name='n1'"))
+    checkAnswer(
       sql(s"select min(id), max(id), min(name), max(name), min(city), 
max(city)" +
           s" from $bloomDMSampleTable"),
       sql(s"select min(id), max(id), min(name), max(name), min(city), 
max(city)" +

Reply via email to