Apache9 commented on a change in pull request #3617: URL: https://github.com/apache/hbase/pull/3617#discussion_r696753353
########## File path: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java ########## @@ -592,19 +597,41 @@ void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException { * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo} * @throws IOException */ - public Path commitDaughterRegion(final RegionInfo regionInfo) + public Path commitDaughterRegion(final RegionInfo regionInfo, List<Path> allRegionFiles) throws IOException { Path regionDir = this.getSplitsDir(regionInfo); if (fs.exists(regionDir)) { // Write HRI to a file in case we need to recover hbase:meta Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE); byte[] regionInfoContent = getRegionInfoFileContent(regionInfo); writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); + loadRegionFilesIntoStoreTracker(allRegionFiles); } - return regionDir; } + private void loadRegionFilesIntoStoreTracker(List<Path> allFiles) throws IOException { + //we need to map trackers per store + Map<String, StoreFileTracker> trackerMap = new HashMap<>(); + //we need to map store files per store + Map<String, List<StoreFileInfo>> fileInfoMap = new HashMap<>(); + for(Path file : allFiles) { + String familyName = file.getParent().getName(); + trackerMap.computeIfAbsent(familyName, t -> { + ColumnFamilyDescriptorBuilder fDescBuilder = + ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(familyName)); + return StoreFileTrackerFactory.create(conf, regionInfo.getTable(), true, Review comment: I think here when creating StoreFileTracker, we should use the modified Configuration instance where we also include the configurations declared in table description and family description, as you can set StoreEngine per table or family. ########## File path: hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java ########## @@ -584,27 +584,28 @@ private void createMergedRegion(final MasterProcedureEnv env) throws IOException final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); final Path tableDir = CommonFSUtils.getTableDir(mfs.getRootDir(), regionsToMerge[0].getTable()); final FileSystem fs = mfs.getFileSystem(); - + List<Path> mergedFiles = new ArrayList<>(); HRegionFileSystem mergeRegionFs = HRegionFileSystem.createRegionOnFileSystem( env.getMasterConfiguration(), fs, tableDir, mergedRegion); for (RegionInfo ri: this.regionsToMerge) { HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( env.getMasterConfiguration(), fs, tableDir, ri, false); - mergeStoreFiles(env, regionFs, mergeRegionFs, mergedRegion); + mergedFiles.addAll(mergeStoreFiles(env, regionFs, mergeRegionFs, mergedRegion)); } assert mergeRegionFs != null; - mergeRegionFs.commitMergedRegion(); + mergeRegionFs.commitMergedRegion(mergedFiles); // Prepare to create merged regions env.getAssignmentManager().getRegionStates(). getOrCreateRegionStateNode(mergedRegion).setState(State.MERGING_NEW); } - private void mergeStoreFiles(MasterProcedureEnv env, HRegionFileSystem regionFs, + private List<Path> mergeStoreFiles(MasterProcedureEnv env, HRegionFileSystem regionFs, HRegionFileSystem mergeRegionFs, RegionInfo mergedRegion) throws IOException { final TableDescriptor htd = env.getMasterServices().getTableDescriptors() .get(mergedRegion.getTable()); + List<Path> mergedFiles = new ArrayList<>(); for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) { String family = hcd.getNameAsString(); final Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family); Review comment: I think here we should use StoreFileTracker to get the file list, instead of loading from HRegionFileSystem? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org