manojpec commented on a change in pull request #3843: URL: https://github.com/apache/hudi/pull/3843#discussion_r734286434
########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java ########## @@ -233,24 +253,33 @@ public void initTableMetadata() { } } - protected void bootstrapIfNeeded(HoodieEngineContext engineContext, HoodieTableMetaClient dataMetaClient) throws IOException { + /** + * Bootstrap the metadata table if needed. + * + * @param engineContext - Engine context + * @param dataMetaClient - Meta client for the data table + * @param actionMetadata - Optional action metadata + * @param <T> - Action metadata types extending Avro generated SpecificRecordBase + * @throws IOException + */ + protected <T extends SpecificRecordBase> void bootstrapIfNeeded(HoodieEngineContext engineContext, + HoodieTableMetaClient dataMetaClient, + Option<T> actionMetadata) throws IOException { HoodieTimer timer = new HoodieTimer().startTimer(); - boolean exists = dataMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(), HoodieTableMetaClient.METAFOLDER_NAME)); + + boolean exists = dataMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(), + HoodieTableMetaClient.METAFOLDER_NAME)); boolean rebootstrap = false; + + // If the un-synced instants have been archived, then + // the metadata table will need to be bootstrapped again. if (exists) { - // If the un-synched instants have been archived then the metadata table will need to be bootstrapped again - HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get()) + final HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get()) .setBasePath(metadataWriteConfig.getBasePath()).build(); - Option<HoodieInstant> latestMetadataInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant(); - if (!latestMetadataInstant.isPresent()) { - LOG.warn("Metadata Table will need to be re-bootstrapped as no instants were found"); - rebootstrap = true; - } else if (!latestMetadataInstant.get().getTimestamp().equals(SOLO_COMMIT_TIMESTAMP) - && dataMetaClient.getActiveTimeline().getAllCommitsTimeline().isBeforeTimelineStarts(latestMetadataInstant.get().getTimestamp())) { - // TODO: Revisit this logic and validate that filtering for all commits timeline is the right thing to do - LOG.warn("Metadata Table will need to be re-bootstrapped as un-synced instants have been archived." - + " latestMetadataInstant=" + latestMetadataInstant.get().getTimestamp() - + ", latestDataInstant=" + dataMetaClient.getActiveTimeline().firstInstant().get().getTimestamp()); + final Option<HoodieInstant> latestMetadataInstant = + metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant(); + + if (isBootstrapNeeded(latestMetadataInstant, actionMetadata)) { Review comment: Right, fixed it. ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java ########## @@ -110,15 +124,20 @@ protected HoodieBackedTableMetadataWriter(Configuration hadoopConf, HoodieWriteC enabled = true; // Inline compaction and auto clean is required as we dont expose this table outside - ValidationUtils.checkArgument(!this.metadataWriteConfig.isAutoClean(), "Cleaning is controlled internally for Metadata table."); - ValidationUtils.checkArgument(!this.metadataWriteConfig.inlineCompactionEnabled(), "Compaction is controlled internally for metadata table."); + ValidationUtils.checkArgument(!this.metadataWriteConfig.isAutoClean(), + "Cleaning is controlled internally for Metadata table."); + ValidationUtils.checkArgument(!this.metadataWriteConfig.inlineCompactionEnabled(), + "Compaction is controlled internally for metadata table."); // Metadata Table cannot have metadata listing turned on. (infinite loop, much?) - ValidationUtils.checkArgument(this.metadataWriteConfig.shouldAutoCommit(), "Auto commit is required for Metadata Table"); - ValidationUtils.checkArgument(!this.metadataWriteConfig.isMetadataTableEnabled(), "File listing cannot be used for Metadata Table"); + ValidationUtils.checkArgument(this.metadataWriteConfig.shouldAutoCommit(), Review comment: As suggested in the wiki I have the reformatting for changed lines (line 140). IDE for some reasons chose to format the above lines closer it also. Anyways these lines were crossing the length of 120 and I broke the log statements into multi line for easy readability and not to cross 120 width. ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java ########## @@ -270,6 +299,53 @@ protected void bootstrapIfNeeded(HoodieEngineContext engineContext, HoodieTableM } } + /** + * Whether bootstrap operation needed for this metadata table. + * <p> + * Rollback of the first commit would look like un-synced instants in the metadata table. + * Action metadata is needed to verify the instant time and avoid erroneous bootstrapping. + * <p> + * TODO: Revisit this logic and validate that filtering for all + * commits timeline is the right thing to do + * + * @return True if the bootstrap is not needed, False otherwise + */ + private <T extends SpecificRecordBase> boolean isBootstrapNeeded(Option<HoodieInstant> latestMetadataInstant, + Option<T> actionMetadata) { + if (!latestMetadataInstant.isPresent()) { + LOG.warn("Metadata Table will need to be re-bootstrapped as no instants were found"); + return true; + } + + final String latestMetadataInstantTimestamp = latestMetadataInstant.get().getTimestamp(); + if (latestMetadataInstantTimestamp.equals(SOLO_COMMIT_TIMESTAMP)) { + return false; + } + + boolean isRollbackAction = false; + List<String> rollbackedTimestamps = Collections.emptyList(); + if (actionMetadata.isPresent() && actionMetadata.get() instanceof HoodieRollbackMetadata) { + isRollbackAction = true; + List<HoodieInstantInfo> rollbackedInstants = + ((HoodieRollbackMetadata) actionMetadata.get()).getInstantsRollback(); + rollbackedTimestamps = rollbackedInstants.stream().map(instant -> { + return instant.getCommitTime().toString(); + }).collect(Collectors.toList()); + } + + if (dataMetaClient.getActiveTimeline().getAllCommitsTimeline().isBeforeTimelineStarts( Review comment: Good catch. Fixed it. ########## File path: hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java ########## @@ -270,6 +299,53 @@ protected void bootstrapIfNeeded(HoodieEngineContext engineContext, HoodieTableM } } + /** + * Whether bootstrap operation needed for this metadata table. + * <p> + * Rollback of the first commit would look like un-synced instants in the metadata table. + * Action metadata is needed to verify the instant time and avoid erroneous bootstrapping. + * <p> + * TODO: Revisit this logic and validate that filtering for all + * commits timeline is the right thing to do + * + * @return True if the bootstrap is not needed, False otherwise + */ + private <T extends SpecificRecordBase> boolean isBootstrapNeeded(Option<HoodieInstant> latestMetadataInstant, + Option<T> actionMetadata) { + if (!latestMetadataInstant.isPresent()) { + LOG.warn("Metadata Table will need to be re-bootstrapped as no instants were found"); + return true; + } + + final String latestMetadataInstantTimestamp = latestMetadataInstant.get().getTimestamp(); + if (latestMetadataInstantTimestamp.equals(SOLO_COMMIT_TIMESTAMP)) { Review comment: Yes, it was combined with all other conditions. Old line 248 has it. I pulled it out here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org