Github user qiuchenjian commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2713#discussion_r242387613 --- Diff: datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java --- @@ -218,56 +218,46 @@ public DataMapBuilder createBuilder(Segment segment, String shardName, this.bloomFilterSize, this.bloomFilterFpp, bloomCompress); } - /** - * returns all shard directories of bloom index files for query - * if bloom index files are merged we should get only one shard path - */ - private Set<String> getAllShardPaths(String tablePath, String segmentId) { - String dataMapStorePath = CarbonTablePath.getDataMapStorePath( - tablePath, segmentId, dataMapName); - CarbonFile[] carbonFiles = FileFactory.getCarbonFile(dataMapStorePath).listFiles(); - Set<String> shardPaths = new HashSet<>(); + + private boolean isAllShardsMerged(String dmSegmentPath) { + boolean mergeShardExist = false; boolean mergeShardInprogress = false; - CarbonFile mergeShardFile = null; + CarbonFile[] carbonFiles = FileFactory.getCarbonFile(dmSegmentPath).listFiles(); for (CarbonFile carbonFile : carbonFiles) { - if (carbonFile.getName().equals(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME)) { - mergeShardFile = carbonFile; - } else if (carbonFile.getName().equals(BloomIndexFileStore.MERGE_INPROGRESS_FILE)) { + String fileName = carbonFile.getName(); + if (fileName.equals(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME)) { + mergeShardExist = true; + } else if (fileName.equals(BloomIndexFileStore.MERGE_INPROGRESS_FILE)) { mergeShardInprogress = true; - } else if (carbonFile.isDirectory()) { - shardPaths.add(FileFactory.getPath(carbonFile.getAbsolutePath()).toString()); } } - if (mergeShardFile != null && !mergeShardInprogress) { - // should only get one shard path if mergeShard is generated successfully - shardPaths.clear(); - shardPaths.add(FileFactory.getPath(mergeShardFile.getAbsolutePath()).toString()); - } - return shardPaths; + return mergeShardExist && !mergeShardInprogress; } @Override public List<CoarseGrainDataMap> getDataMaps(Segment segment) throws IOException { List<CoarseGrainDataMap> dataMaps = new ArrayList<>(); try { - Set<String> shardPaths = segmentMap.get(segment.getSegmentNo()); - if (shardPaths == null) { - shardPaths = getAllShardPaths(getCarbonTable().getTablePath(), segment.getSegmentNo()); - segmentMap.put(segment.getSegmentNo(), shardPaths); - } - Set<String> filteredShards = segment.getFilteredIndexShardNames(); - for (String shard : shardPaths) { - if (shard.endsWith(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME) || - filteredShards.contains(new File(shard).getName())) { - // Filter out the tasks which are filtered through Main datamap. - // for merge shard, shard pruning delay to be done before pruning blocklet - BloomCoarseGrainDataMap bloomDM = new BloomCoarseGrainDataMap(); - bloomDM.init(new BloomDataMapModel(shard, cache, segment.getConfiguration())); - bloomDM.initIndexColumnConverters(getCarbonTable(), dataMapMeta.getIndexedColumns()); - bloomDM.setFilteredShard(filteredShards); - dataMaps.add(bloomDM); - } + String dmSegmentPath = CarbonTablePath.getDataMapStorePath( + getCarbonTable().getTablePath(), segment.getSegmentNo(), dataMapName); + boolean useMergeShard = isAllShardsMerged(dmSegmentPath); + + // make use of filtered shard info from default datamap to build bloom datamap + BloomCoarseGrainDataMap bloomDM = new BloomCoarseGrainDataMap(); + bloomDM.init(new BloomDataMapModel(dmSegmentPath, cache, FileFactory.getConfiguration())); + bloomDM.initIndexColumnConverters(getCarbonTable(), dataMapMeta.getIndexedColumns()); + bloomDM.setFilteredShard(segment.getFilteredIndexShardNames(), useMergeShard); + dataMaps.add(bloomDM); + + // save shard info for clearing cache + Set<String> shardPaths = new HashSet<>(); + if (useMergeShard) { + shardPaths.add(dmSegmentPath + File.separator + + BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME); + } else { + shardPaths.addAll(segment.getFilteredIndexShardNames()); } + segmentMap.put(segment.getSegmentNo(), shardPaths); --- End diff -- segmentMap is used cache the shardPaths, now it's uselessï¼I don't think it's necessary to get shardPaths it's ok to change segmentMap to a Set that add segment no
---