vinothchandar commented on a change in pull request #3590:
URL: https://github.com/apache/hudi/pull/3590#discussion_r716234905



##########
File path: 
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -74,14 +77,11 @@
   // Metadata table's timeline and metaclient
   private HoodieTableMetaClient metaClient;
   private HoodieTableConfig tableConfig;
-  private List<FileSlice> latestFileSystemMetadataSlices;
   // should we reuse the open file handles, across calls
   private final boolean reuse;
 
-
-  // Readers for the base and log file which store the metadata
-  private transient HoodieFileReader<GenericRecord> baseFileReader;
-  private transient HoodieMetadataMergedLogRecordScanner logRecordScanner;
+  // Readers for each buckets identified by the bucketFileId
+  private Map<String, Pair<HoodieFileReader, 
HoodieMetadataMergedLogRecordScanner>> bucketReaders = new 
ConcurrentHashMap<>();

Review comment:
       buckets? really? :) 

##########
File path: 
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -56,15 +59,15 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.stream.Collectors;
 
 /**
  * Table metadata provided by an internal DFS backed Hudi metadata table.
- *
- * If the metadata table does not exist, RPC calls are used to retrieve file 
listings from the file system.
- * No updates are applied to the table and it is not synced.
  */
 public class HoodieBackedTableMetadata extends BaseTableMetadata {

Review comment:
       lets please unit test this file thoroughly. 

##########
File path: 
hudi-flink/src/test/java/org/apache/hudi/sink/TestStreamWriteOperatorCoordinator.java
##########
@@ -180,7 +181,7 @@ public void testHiveSyncInvoked() throws Exception {
     assertDoesNotThrow(() -> coordinator.notifyCheckpointComplete(1));
   }
 
-  @Test
+  @Disabled

Review comment:
       are we tracking a Jira to re-enable this

##########
File path: 
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -167,173 +170,150 @@ private void initIfNeeded() {
       throw new HoodieIOException("Error merging records from metadata table 
for key :" + key, ioe);
     } finally {
       if (!reuse) {
-        closeOrThrow();
+        close(partitionName);
       }
     }
   }
 
-  private void openReadersIfNeededOrThrow() {
-    try {
-      openReadersIfNeeded();
-    } catch (IOException e) {
-      throw new HoodieIOException("Error opening readers to the Metadata 
Table: ", e);
-    }
-  }
-
   /**
    * Returns a new pair of readers to the base and log files.
    */
-  private void openReadersIfNeeded() throws IOException {
-    if (reuse && (baseFileReader != null || logRecordScanner != null)) {
-      // quickly exit out without synchronizing if reusing and readers are 
already open
-      return;
-    }
-
-    // we always force synchronization, if reuse=false, to handle concurrent 
close() calls as well.
-    synchronized (this) {
-      if (baseFileReader != null || logRecordScanner != null) {
-        return;
-      }
-
-      final long baseFileOpenMs;
-      final long logScannerOpenMs;
+  private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordScanner> 
openReadersIfNeeded(String key, String partitionName) throws IOException {

Review comment:
       please audit this and ensure the reuse of readers across different calls 
to `getRecordKeyByKey()` works from the timeline server path. Otherwise, 
performance will suffer from repeated open and close-es

##########
File path: 
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -116,20 +116,23 @@ private void initIfNeeded() {
         this.metaClient = null;
         this.tableConfig = null;
       }
-
-      if (enabled) {
-        openTimelineScanner(metaClient.getActiveTimeline());
-      }
     }
   }
 
   @Override
-  protected Option<HoodieRecord<HoodieMetadataPayload>> 
getRecordByKeyFromMetadata(String key) {
+  protected Option<HoodieRecord<HoodieMetadataPayload>> 
getRecordByKeyFromMetadata(String key, String partitionName) {
+    Pair<HoodieFileReader, HoodieMetadataMergedLogRecordScanner> readers;
+    try {
+      readers = openReadersIfNeeded(key, partitionName);
+    } catch (IOException e) {

Review comment:
       why bring the exception handling back in again? I am sure Robert C 
Martin would agree with having exception handling method separately. :) 

##########
File path: 
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -167,173 +170,150 @@ private void initIfNeeded() {
       throw new HoodieIOException("Error merging records from metadata table 
for key :" + key, ioe);
     } finally {
       if (!reuse) {
-        closeOrThrow();
+        close(partitionName);
       }
     }
   }
 
-  private void openReadersIfNeededOrThrow() {
-    try {
-      openReadersIfNeeded();
-    } catch (IOException e) {
-      throw new HoodieIOException("Error opening readers to the Metadata 
Table: ", e);
-    }
-  }
-
   /**
    * Returns a new pair of readers to the base and log files.
    */
-  private void openReadersIfNeeded() throws IOException {
-    if (reuse && (baseFileReader != null || logRecordScanner != null)) {
-      // quickly exit out without synchronizing if reusing and readers are 
already open
-      return;
-    }
-
-    // we always force synchronization, if reuse=false, to handle concurrent 
close() calls as well.
-    synchronized (this) {
-      if (baseFileReader != null || logRecordScanner != null) {
-        return;
-      }
-
-      final long baseFileOpenMs;
-      final long logScannerOpenMs;
+  private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordScanner> 
openReadersIfNeeded(String key, String partitionName) throws IOException {

Review comment:
       This method is too long. lets break them down nicely




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to