[CARBONDATA-2616][BloomDataMap] Fix bugs in querying bloom datamap with two index columns
During pruning in bloomfilter datamap, the same blocklets has been added to result more than once, thus causing explaining and querying returning incorrect result. This closes #2386 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/6eb360e1 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/6eb360e1 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/6eb360e1 Branch: refs/heads/carbonstore Commit: 6eb360e1f5f577a576d185efb7dcbf1cc6a302e8 Parents: 01b48fc Author: xuchuanyin <xuchuan...@hust.edu.cn> Authored: Wed Jun 20 16:31:28 2018 +0800 Committer: Jacky Li <jacky.li...@qq.com> Committed: Thu Jun 21 12:03:48 2018 +0800 ---------------------------------------------------------------------- .../datamap/bloom/BloomCoarseGrainDataMap.java | 5 +++-- .../bloom/BloomCoarseGrainDataMapSuite.scala | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/6eb360e1/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java ---------------------------------------------------------------------- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java index e9af0ff..ed03256 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Set; @@ -85,7 +86,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { @Override public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties segmentProperties, List<PartitionSpec> partitions) { - List<Blocklet> hitBlocklets = new ArrayList<Blocklet>(); + Set<Blocklet> hitBlocklets = new HashSet<>(); if (filterExp == null) { // null is different from empty here. Empty means after pruning, no blocklet need to scan. return null; @@ -111,7 +112,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { } } } - return hitBlocklets; + return new ArrayList<>(hitBlocklets); } private byte[] convertValueToBytes(DataType dataType, Object value) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/6eb360e1/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala index 7df3901..c9a4097 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala @@ -71,7 +71,23 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with checkAnswer( checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 'city_999'", dataMapName, shouldHit), sql(s"select * from $normalTable where city = 'city_999'")) - checkAnswer( + // query with two index_columns + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id = 1 and city='city_1'", dataMapName, shouldHit), + sql(s"select * from $normalTable where id = 1 and city='city_1'")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id = 999 and city='city_999'", dataMapName, shouldHit), + sql(s"select * from $normalTable where id = 999 and city='city_999'")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 'city_1' and id = 0", dataMapName, shouldHit), + sql(s"select * from $normalTable where city = 'city_1' and id = 0")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 'city_999' and name='n999'", dataMapName, shouldHit), + sql(s"select * from $normalTable where city = 'city_999' and name='n999'")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 'city_999' and name='n1'", dataMapName, shouldHit), + sql(s"select * from $normalTable where city = 'city_999' and name='n1'")) + checkAnswer( sql(s"select min(id), max(id), min(name), max(name), min(city), max(city)" + s" from $bloomDMSampleTable"), sql(s"select min(id), max(id), min(name), max(name), min(city), max(city)" +