Github user qiuchenjian commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/2713#discussion_r242387613
  
    --- Diff: 
datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
 ---
    @@ -218,56 +218,46 @@ public DataMapBuilder createBuilder(Segment segment, 
String shardName,
             this.bloomFilterSize, this.bloomFilterFpp, bloomCompress);
       }
     
    -  /**
    -   * returns all shard directories of bloom index files for query
    -   * if bloom index files are merged we should get only one shard path
    -   */
    -  private Set<String> getAllShardPaths(String tablePath, String segmentId) 
{
    -    String dataMapStorePath = CarbonTablePath.getDataMapStorePath(
    -        tablePath, segmentId, dataMapName);
    -    CarbonFile[] carbonFiles = 
FileFactory.getCarbonFile(dataMapStorePath).listFiles();
    -    Set<String> shardPaths = new HashSet<>();
    +
    +  private boolean isAllShardsMerged(String dmSegmentPath) {
    +    boolean mergeShardExist = false;
         boolean mergeShardInprogress = false;
    -    CarbonFile mergeShardFile = null;
    +    CarbonFile[] carbonFiles = 
FileFactory.getCarbonFile(dmSegmentPath).listFiles();
         for (CarbonFile carbonFile : carbonFiles) {
    -      if 
(carbonFile.getName().equals(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME)) 
{
    -        mergeShardFile = carbonFile;
    -      } else if 
(carbonFile.getName().equals(BloomIndexFileStore.MERGE_INPROGRESS_FILE)) {
    +      String fileName = carbonFile.getName();
    +      if 
(fileName.equals(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME)) {
    +        mergeShardExist = true;
    +      } else if 
(fileName.equals(BloomIndexFileStore.MERGE_INPROGRESS_FILE)) {
             mergeShardInprogress = true;
    -      } else if (carbonFile.isDirectory()) {
    -        
shardPaths.add(FileFactory.getPath(carbonFile.getAbsolutePath()).toString());
           }
         }
    -    if (mergeShardFile != null && !mergeShardInprogress) {
    -      // should only get one shard path if mergeShard is generated 
successfully
    -      shardPaths.clear();
    -      
shardPaths.add(FileFactory.getPath(mergeShardFile.getAbsolutePath()).toString());
    -    }
    -    return shardPaths;
    +    return mergeShardExist && !mergeShardInprogress;
       }
     
       @Override
       public List<CoarseGrainDataMap> getDataMaps(Segment segment) throws 
IOException {
         List<CoarseGrainDataMap> dataMaps = new ArrayList<>();
         try {
    -      Set<String> shardPaths = segmentMap.get(segment.getSegmentNo());
    -      if (shardPaths == null) {
    -        shardPaths = getAllShardPaths(getCarbonTable().getTablePath(), 
segment.getSegmentNo());
    -        segmentMap.put(segment.getSegmentNo(), shardPaths);
    -      }
    -      Set<String> filteredShards = segment.getFilteredIndexShardNames();
    -      for (String shard : shardPaths) {
    -        if 
(shard.endsWith(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME) ||
    -            filteredShards.contains(new File(shard).getName())) {
    -          // Filter out the tasks which are filtered through Main datamap.
    -          // for merge shard, shard pruning delay to be done before 
pruning blocklet
    -          BloomCoarseGrainDataMap bloomDM = new BloomCoarseGrainDataMap();
    -          bloomDM.init(new BloomDataMapModel(shard, cache, 
segment.getConfiguration()));
    -          bloomDM.initIndexColumnConverters(getCarbonTable(), 
dataMapMeta.getIndexedColumns());
    -          bloomDM.setFilteredShard(filteredShards);
    -          dataMaps.add(bloomDM);
    -        }
    +      String dmSegmentPath = CarbonTablePath.getDataMapStorePath(
    +          getCarbonTable().getTablePath(), segment.getSegmentNo(), 
dataMapName);
    +      boolean useMergeShard = isAllShardsMerged(dmSegmentPath);
    +
    +      // make use of filtered shard info from default datamap to build 
bloom datamap
    +      BloomCoarseGrainDataMap bloomDM = new BloomCoarseGrainDataMap();
    +      bloomDM.init(new BloomDataMapModel(dmSegmentPath, cache, 
FileFactory.getConfiguration()));
    +      bloomDM.initIndexColumnConverters(getCarbonTable(), 
dataMapMeta.getIndexedColumns());
    +      bloomDM.setFilteredShard(segment.getFilteredIndexShardNames(), 
useMergeShard);
    +      dataMaps.add(bloomDM);
    +
    +      // save shard info for clearing cache
    +      Set<String> shardPaths = new HashSet<>();
    +      if (useMergeShard) {
    +        shardPaths.add(dmSegmentPath + File.separator +
    +            BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME);
    +      } else {
    +        shardPaths.addAll(segment.getFilteredIndexShardNames());
           }
    +      segmentMap.put(segment.getSegmentNo(), shardPaths);
    --- End diff --
    
    segmentMap is used cache the shardPaths, now it's useless,I don't think 
it's necessary to get shardPaths
    it's ok to change segmentMap to a Set that add segment no


---

Reply via email to