manojpec commented on a change in pull request #3843:
URL: https://github.com/apache/hudi/pull/3843#discussion_r734286434



##########
File path: 
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -233,24 +253,33 @@ public void initTableMetadata() {
     }
   }
 
-  protected void bootstrapIfNeeded(HoodieEngineContext engineContext, 
HoodieTableMetaClient dataMetaClient) throws IOException {
+  /**
+   * Bootstrap the metadata table if needed.
+   *
+   * @param engineContext  - Engine context
+   * @param dataMetaClient - Meta client for the data table
+   * @param actionMetadata - Optional action metadata
+   * @param <T>            - Action metadata types extending Avro generated 
SpecificRecordBase
+   * @throws IOException
+   */
+  protected <T extends SpecificRecordBase> void 
bootstrapIfNeeded(HoodieEngineContext engineContext,
+                                                                  
HoodieTableMetaClient dataMetaClient,
+                                                                  Option<T> 
actionMetadata) throws IOException {
     HoodieTimer timer = new HoodieTimer().startTimer();
-    boolean exists = dataMetaClient.getFs().exists(new 
Path(metadataWriteConfig.getBasePath(), HoodieTableMetaClient.METAFOLDER_NAME));
+
+    boolean exists = dataMetaClient.getFs().exists(new 
Path(metadataWriteConfig.getBasePath(),
+        HoodieTableMetaClient.METAFOLDER_NAME));
     boolean rebootstrap = false;
+
+    // If the un-synced instants have been archived, then
+    // the metadata table will need to be bootstrapped again.
     if (exists) {
-      // If the un-synched instants have been archived then the metadata table 
will need to be bootstrapped again
-      HoodieTableMetaClient metadataMetaClient = 
HoodieTableMetaClient.builder().setConf(hadoopConf.get())
+      final HoodieTableMetaClient metadataMetaClient = 
HoodieTableMetaClient.builder().setConf(hadoopConf.get())
           .setBasePath(metadataWriteConfig.getBasePath()).build();
-      Option<HoodieInstant> latestMetadataInstant = 
metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
-      if (!latestMetadataInstant.isPresent()) {
-        LOG.warn("Metadata Table will need to be re-bootstrapped as no 
instants were found");
-        rebootstrap = true;
-      } else if 
(!latestMetadataInstant.get().getTimestamp().equals(SOLO_COMMIT_TIMESTAMP)
-          && 
dataMetaClient.getActiveTimeline().getAllCommitsTimeline().isBeforeTimelineStarts(latestMetadataInstant.get().getTimestamp()))
 {
-        // TODO: Revisit this logic and validate that filtering for all 
commits timeline is the right thing to do
-        LOG.warn("Metadata Table will need to be re-bootstrapped as un-synced 
instants have been archived."
-            + " latestMetadataInstant=" + 
latestMetadataInstant.get().getTimestamp()
-            + ", latestDataInstant=" + 
dataMetaClient.getActiveTimeline().firstInstant().get().getTimestamp());
+      final Option<HoodieInstant> latestMetadataInstant =
+          
metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
+
+      if (isBootstrapNeeded(latestMetadataInstant, actionMetadata)) {

Review comment:
       Right, fixed it. 

##########
File path: 
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -110,15 +124,20 @@ protected HoodieBackedTableMetadataWriter(Configuration 
hadoopConf, HoodieWriteC
       enabled = true;
 
       // Inline compaction and auto clean is required as we dont expose this 
table outside
-      ValidationUtils.checkArgument(!this.metadataWriteConfig.isAutoClean(), 
"Cleaning is controlled internally for Metadata table.");
-      
ValidationUtils.checkArgument(!this.metadataWriteConfig.inlineCompactionEnabled(),
 "Compaction is controlled internally for metadata table.");
+      ValidationUtils.checkArgument(!this.metadataWriteConfig.isAutoClean(),
+          "Cleaning is controlled internally for Metadata table.");
+      
ValidationUtils.checkArgument(!this.metadataWriteConfig.inlineCompactionEnabled(),
+          "Compaction is controlled internally for metadata table.");
       // Metadata Table cannot have metadata listing turned on. (infinite 
loop, much?)
-      
ValidationUtils.checkArgument(this.metadataWriteConfig.shouldAutoCommit(), 
"Auto commit is required for Metadata Table");
-      
ValidationUtils.checkArgument(!this.metadataWriteConfig.isMetadataTableEnabled(),
 "File listing cannot be used for Metadata Table");
+      
ValidationUtils.checkArgument(this.metadataWriteConfig.shouldAutoCommit(),

Review comment:
       As suggested in the wiki I have the reformatting for changed lines (line 
140). IDE for some reasons chose to format the above lines closer it also. 
Anyways these lines were crossing the length of 120 and I broke the log 
statements into multi line for easy readability and not to cross 120 width. 

##########
File path: 
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -270,6 +299,53 @@ protected void bootstrapIfNeeded(HoodieEngineContext 
engineContext, HoodieTableM
     }
   }
 
+  /**
+   * Whether bootstrap operation needed for this metadata table.
+   * <p>
+   * Rollback of the first commit would look like un-synced instants in the 
metadata table.
+   * Action metadata is needed to verify the instant time and avoid erroneous 
bootstrapping.
+   * <p>
+   * TODO: Revisit this logic and validate that filtering for all
+   *       commits timeline is the right thing to do
+   *
+   * @return True if the bootstrap is not needed, False otherwise
+   */
+  private <T extends SpecificRecordBase> boolean 
isBootstrapNeeded(Option<HoodieInstant> latestMetadataInstant,
+                                                                   Option<T> 
actionMetadata) {
+    if (!latestMetadataInstant.isPresent()) {
+      LOG.warn("Metadata Table will need to be re-bootstrapped as no instants 
were found");
+      return true;
+    }
+
+    final String latestMetadataInstantTimestamp = 
latestMetadataInstant.get().getTimestamp();
+    if (latestMetadataInstantTimestamp.equals(SOLO_COMMIT_TIMESTAMP)) {
+      return false;
+    }
+
+    boolean isRollbackAction = false;
+    List<String> rollbackedTimestamps = Collections.emptyList();
+    if (actionMetadata.isPresent() && actionMetadata.get() instanceof 
HoodieRollbackMetadata) {
+      isRollbackAction = true;
+      List<HoodieInstantInfo> rollbackedInstants =
+          ((HoodieRollbackMetadata) 
actionMetadata.get()).getInstantsRollback();
+      rollbackedTimestamps = rollbackedInstants.stream().map(instant -> {
+        return instant.getCommitTime().toString();
+      }).collect(Collectors.toList());
+    }
+
+    if 
(dataMetaClient.getActiveTimeline().getAllCommitsTimeline().isBeforeTimelineStarts(

Review comment:
       Good catch. Fixed it. 

##########
File path: 
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -270,6 +299,53 @@ protected void bootstrapIfNeeded(HoodieEngineContext 
engineContext, HoodieTableM
     }
   }
 
+  /**
+   * Whether bootstrap operation needed for this metadata table.
+   * <p>
+   * Rollback of the first commit would look like un-synced instants in the 
metadata table.
+   * Action metadata is needed to verify the instant time and avoid erroneous 
bootstrapping.
+   * <p>
+   * TODO: Revisit this logic and validate that filtering for all
+   *       commits timeline is the right thing to do
+   *
+   * @return True if the bootstrap is not needed, False otherwise
+   */
+  private <T extends SpecificRecordBase> boolean 
isBootstrapNeeded(Option<HoodieInstant> latestMetadataInstant,
+                                                                   Option<T> 
actionMetadata) {
+    if (!latestMetadataInstant.isPresent()) {
+      LOG.warn("Metadata Table will need to be re-bootstrapped as no instants 
were found");
+      return true;
+    }
+
+    final String latestMetadataInstantTimestamp = 
latestMetadataInstant.get().getTimestamp();
+    if (latestMetadataInstantTimestamp.equals(SOLO_COMMIT_TIMESTAMP)) {

Review comment:
       Yes, it was combined with all other conditions. Old line 248 has it. I 
pulled it out here.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to