vinishjail97 commented on code in PR #772:
URL: https://github.com/apache/incubator-xtable/pull/772#discussion_r2641072664


##########
xtable-core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java:
##########
@@ -82,16 +93,50 @@ ReplaceMetadata extractSnapshotChanges(
         HoodieMetadataConfig.newBuilder()
             .enable(metaClient.getTableConfig().isMetadataTableAvailable())
             .build();
-    HoodieTableFileSystemView fsView =
-        new HoodieMetadataFileSystemView(
-            engineContext, metaClient, metaClient.getActiveTimeline(), 
metadataConfig);
+    HoodieTableMetadata tableMetadata =
+        metadataConfig.isEnabled()
+            ? metaClient
+                .getTableFormat()
+                .getMetadataFactory()
+                .create(
+                    engineContext,
+                    metaClient.getStorage(),
+                    metadataConfig,
+                    tableBasePath.toString(),
+                    true)

Review Comment:
   Is the last parameter supposed to be true? 



##########
xtable-core/src/main/java/org/apache/xtable/hudi/HudiTableManager.java:
##########
@@ -54,6 +56,10 @@ public class HudiTableManager {
       "org.apache.hudi.keygen.TimestampBasedKeyGenerator";
   private static final String COMPLEX_KEY_GENERATOR = 
"org.apache.hudi.keygen.ComplexKeyGenerator";
   private static final String SIMPLE_KEY_GENERATOR = 
"org.apache.hudi.keygen.SimpleKeyGenerator";
+  // Hudi 1.x spark query defaults to "default" database and spark read query 
picks up delta
+  // instead, 0.x doesn't have the same problem.
+  // TODO: https://app.clickup.com/t/18029943/ENG-23339
+  private static final String DEFAULT_DATABASE_NAME = "default_hudi";

Review Comment:
   Is this still required? 



##########
xtable-core/src/main/java/org/apache/hudi/stats/XTableValueMetadata.java:
##########
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+package org.apache.hudi.stats;
+
+import static 
org.apache.xtable.model.schema.InternalSchema.MetadataKey.TIMESTAMP_PRECISION;
+import static 
org.apache.xtable.model.schema.InternalSchema.MetadataValue.MICROS;
+
+import java.lang.reflect.Constructor;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+
+import org.apache.hudi.metadata.HoodieIndexVersion;
+
+import org.apache.xtable.model.schema.InternalSchema;
+import org.apache.xtable.model.stat.ColumnStat;
+
+public class XTableValueMetadata {

Review Comment:
   Can you add java doc for this? 



##########
xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionTarget.java:
##########
@@ -607,7 +627,7 @@ private HoodieWriteConfig getWriteConfig(
               HoodieMetadataConfig.newBuilder()
                   .enable(true)
                   .withProperties(properties)
-                  .withMetadataIndexColumnStats(true)
+                  .withMetadataIndexColumnStats(false)

Review Comment:
   Why is this disabled? 



##########
xtable-core/src/test/java/org/apache/xtable/TestAbstractHudiTable.java:
##########
@@ -147,6 +154,9 @@ public abstract class TestAbstractHudiTable
       // Add key generator
       this.typedProperties = new TypedProperties();
       typedProperties.put(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), 
RECORD_KEY_FIELD_NAME);
+      
typedProperties.put(HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key(),
 "false");
+      typedProperties.put(HoodieTableConfig.TABLE_FORMAT.key(), 
TableFormat.ICEBERG);
+      // typedProperties.put(HoodieMetadataConfig.ENABLE.key(), "false");

Review Comment:
   Why is TABLE_FORMAT ICEBERG and MDT disabled? 



##########
xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java:
##########
@@ -1,466 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- 
-package org.apache.xtable.hudi;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.net.URI;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.sql.Date;
-import java.time.Instant;
-import java.time.LocalDate;
-import java.time.ZoneOffset;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import org.apache.avro.Conversions;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.parquet.avro.AvroParquetWriter;
-import org.apache.parquet.hadoop.ParquetWriter;
-import org.apache.parquet.hadoop.util.HadoopOutputFile;
-import org.jetbrains.annotations.NotNull;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-
-import org.apache.hudi.client.common.HoodieJavaEngineContext;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
-import org.apache.hudi.common.model.HoodieAvroPayload;
-import org.apache.hudi.common.model.HoodieAvroRecord;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.metadata.HoodieTableMetadata;
-
-import org.apache.xtable.GenericTable;
-import org.apache.xtable.TestJavaHudiTable;
-import org.apache.xtable.model.schema.InternalField;
-import org.apache.xtable.model.schema.InternalSchema;
-import org.apache.xtable.model.schema.InternalType;
-import org.apache.xtable.model.stat.ColumnStat;
-import org.apache.xtable.model.storage.FileFormat;
-import org.apache.xtable.model.storage.InternalDataFile;
-
-public class TestHudiFileStatsExtractor {

Review Comment:
   Why is this file deleted? 



##########
xtable-core/src/main/java/org/apache/hudi/stats/XTableValueMetadata.java:
##########
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+package org.apache.hudi.stats;
+
+import static 
org.apache.xtable.model.schema.InternalSchema.MetadataKey.TIMESTAMP_PRECISION;
+import static 
org.apache.xtable.model.schema.InternalSchema.MetadataValue.MICROS;
+
+import java.lang.reflect.Constructor;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+
+import org.apache.hudi.metadata.HoodieIndexVersion;
+
+import org.apache.xtable.model.schema.InternalSchema;
+import org.apache.xtable.model.stat.ColumnStat;
+
+public class XTableValueMetadata {

Review Comment:
   Can you add tests for this class? 



##########
xtable-core/src/test/java/org/apache/xtable/ITConversionController.java:
##########
@@ -227,8 +231,12 @@ private ConversionSourceProvider<?> 
getConversionSourceProvider(String sourceTab
   public void testVariousOperations(
       String sourceTableFormat, SyncMode syncMode, boolean isPartitioned) {
     String tableName = getTableName();
-    ConversionController conversionController = new 
ConversionController(jsc.hadoopConfiguration());
     List<String> targetTableFormats = getOtherFormats(sourceTableFormat);
+    if (sourceTableFormat.equals(PAIMON)) {

Review Comment:
   How are we planning to fix this? 



##########
xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java:
##########
@@ -216,7 +218,13 @@ private CommitsPair 
getCompletedAndPendingCommitsAfterInstant(HoodieInstant comm
     List<HoodieInstant> allInstants =
         metaClient
             .getActiveTimeline()
-            .findInstantsAfter(commitInstant.getTimestamp())
+            .filter(

Review Comment:
   Why can't we use `findInstantsAfter`? 



##########
xtable-core/src/main/java/org/apache/xtable/hudi/HudiConversionSource.java:
##########
@@ -112,7 +115,7 @@ public InternalSnapshot getCurrentSnapshot() {
             pendingInstants.stream()
                 .map(
                     hoodieInstant ->
-                        
HudiInstantUtils.parseFromInstantTime(hoodieInstant.getTimestamp()))
+                        
HudiInstantUtils.parseFromInstantTime(hoodieInstant.requestedTime()))

Review Comment:
   Add comments on when requestedTime vs completionTime is being used? 



##########
xtable-core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java:
##########
@@ -167,10 +213,13 @@ ReplaceMetadata extractSnapshotChanges(
    *
    * @param internalFilesDiff the diff to apply to the Hudi table
    * @param commit The current commit started by the Hudi client
+   * @param indexVersion the Hudi index version
    * @return The information needed to create a "replace" commit for the Hudi 
table
    */
   ReplaceMetadata convertDiff(
-      @NonNull InternalFilesDiff internalFilesDiff, @NonNull String commit) {
+      @NonNull InternalFilesDiff internalFilesDiff,
+      @NonNull String commit,
+      HoodieIndexVersion indexVersion) {

Review Comment:
   This can be `@NonNull` too? 



##########
xtable-core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java:
##########
@@ -82,16 +93,50 @@ ReplaceMetadata extractSnapshotChanges(
         HoodieMetadataConfig.newBuilder()
             .enable(metaClient.getTableConfig().isMetadataTableAvailable())
             .build();
-    HoodieTableFileSystemView fsView =
-        new HoodieMetadataFileSystemView(
-            engineContext, metaClient, metaClient.getActiveTimeline(), 
metadataConfig);
+    HoodieTableMetadata tableMetadata =
+        metadataConfig.isEnabled()
+            ? metaClient
+                .getTableFormat()
+                .getMetadataFactory()
+                .create(
+                    engineContext,
+                    metaClient.getStorage(),
+                    metadataConfig,
+                    tableBasePath.toString(),
+                    true)
+            : null;
+    FileSystemViewManager fileSystemViewManager =
+        FileSystemViewManager.createViewManager(
+            engineContext,
+            metadataConfig,
+            FileSystemViewStorageConfig.newBuilder()
+                .withStorageType(FileSystemViewStorageType.MEMORY)
+                .build(),
+            HoodieCommonConfig.newBuilder().build(),
+            meta -> tableMetadata);
+    try (SyncableFileSystemView fsView = 
fileSystemViewManager.getFileSystemView(metaClient)) {
+      return extractFromFsView(partitionedDataFiles, commit, fsView, 
metaClient, metadataConfig);
+    } finally {
+      fileSystemViewManager.close();
+      if (tableMetadata != null) {
+        tableMetadata.close();
+      }
+    }
+  }
+
+  ReplaceMetadata extractFromFsView(
+      List<PartitionFileGroup> partitionedDataFiles,
+      String commit,
+      SyncableFileSystemView fsView,
+      HoodieTableMetaClient metaClient,
+      HoodieMetadataConfig metadataConfig) {
     boolean isTableInitialized = metaClient.isTimelineNonEmpty();
     // Track the partitions that are not present in the snapshot, so the files 
for those partitions
     // can be dropped
+    HoodieIndexVersion indexVersion =
+        existingIndexVersionOrDefault(PARTITION_NAME_COLUMN_STATS, metaClient);

Review Comment:
   Rename `existingIndexVersionOrDefault` to 
`getExistingIndexVersionOrDefault`? 



##########
xtable-core/src/main/java/org/apache/xtable/hudi/HudiInstantUtils.java:
##########
@@ -34,7 +34,7 @@
 
 import org.apache.xtable.model.exception.ParseException;
 
-class HudiInstantUtils {
+public class HudiInstantUtils {

Review Comment:
   Why is this public? 



##########
xtable-core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java:
##########
@@ -74,6 +84,7 @@ public class BaseFileUpdatesExtractor {
    * @param commit The current commit started by the Hudi client
    * @return The information needed to create a "replace" commit for the Hudi 
table
    */
+  @SneakyThrows

Review Comment:
   Can we catch/throw actual exceptions and avoid `@SneakyThrows` in main repo? 



##########
xtable-core/src/test/java/org/apache/xtable/ITConversionController.java:
##########
@@ -533,28 +536,31 @@ private static Stream<Arguments> 
provideArgsForPartitionTesting() {
         Arguments.of(
             buildArgsForPartition(
                 ICEBERG, Arrays.asList(DELTA, HUDI), null, "level:VALUE", 
levelFilter)),
-        Arguments.of(
-            // Delta Lake does not currently support nested partition columns
-            buildArgsForPartition(
-                HUDI,
-                Arrays.asList(ICEBERG),
-                "nested_record.level:SIMPLE",
-                "nested_record.level:VALUE",
-                nestedLevelFilter)),
+        // Different issue, didn't investigate this much at all
+        //        Arguments.of(
+        //            // Delta Lake does not currently support nested 
partition columns
+        //            buildArgsForPartition(
+        //                HUDI,
+        //                Arrays.asList(ICEBERG),
+        //                "nested_record.level:SIMPLE",
+        //                "nested_record.level:VALUE",
+        //                nestedLevelFilter)),
         Arguments.of(
             buildArgsForPartition(
                 HUDI,
                 Arrays.asList(ICEBERG, DELTA),
                 "severity:SIMPLE",
                 "severity:VALUE",
-                severityFilter)),
-        Arguments.of(
-            buildArgsForPartition(
-                HUDI,
-                Arrays.asList(ICEBERG, DELTA),
-                "timestamp_micros_nullable_field:TIMESTAMP,level:SIMPLE",
-                "timestamp_micros_nullable_field:DAY:yyyy/MM/dd,level:VALUE",
-                timestampAndLevelFilter)));
+                severityFilter)));
+    // [ENG-6555] addresses this

Review Comment:
   What's the issue and why is the test disabled? 



##########
xtable-core/src/test/java/org/apache/xtable/hudi/ITHudiConversionTarget.java:
##########
@@ -306,8 +318,12 @@ void 
archiveTimelineAndCleanMetadataTableAfterMultipleCommits(String partitionPa
         metaClient, partitionPath, Arrays.asList(file0Pair, Pair.of(fileName1, 
filePath1)));
     try (HoodieBackedTableMetadata hoodieBackedTableMetadata =
         new HoodieBackedTableMetadata(
-            CONTEXT, getHoodieWriteConfig(metaClient).getMetadataConfig(), 
tableBasePath, true)) {
-      assertColStats(hoodieBackedTableMetadata, partitionPath, fileName1);
+            CONTEXT,
+            metaClient.getStorage(),
+            getHoodieWriteConfig(metaClient).getMetadataConfig(),
+            tableBasePath,
+            true)) {
+      // assertColStats(hoodieBackedTableMetadata, partitionPath, fileName1);

Review Comment:
   Why are colStats disabled and should this be decided based on table version 
provided by the user i.e 6 vs 9?  



##########
xtable-core/src/main/java/org/apache/xtable/hudi/HudiDataFileExtractor.java:
##########
@@ -85,15 +85,23 @@ public HudiDataFileExtractor(
       HoodieTableMetaClient metaClient,
       PathBasedPartitionValuesExtractor hudiPartitionValuesExtractor,
       HudiFileStatsExtractor hudiFileStatsExtractor) {
-    this.engineContext = new 
HoodieLocalEngineContext(metaClient.getHadoopConf());
+    this.engineContext = new 
HoodieLocalEngineContext(metaClient.getStorageConf());
     metadataConfig =
         HoodieMetadataConfig.newBuilder()
             .enable(metaClient.getTableConfig().isMetadataTableAvailable())
             .build();
-    this.basePath = metaClient.getBasePathV2();
+    this.basePath = 
HadoopFSUtils.convertToHadoopPath(metaClient.getBasePath());
     this.tableMetadata =
-        metadataConfig.enabled()
-            ? HoodieTableMetadata.create(engineContext, metadataConfig, 
basePath.toString(), true)
+        metadataConfig.isEnabled()
+            ? metaClient
+                .getTableFormat()
+                .getMetadataFactory()
+                .create(
+                    engineContext,
+                    metaClient.getStorage(),
+                    metadataConfig,
+                    basePath.toString(),
+                    true)

Review Comment:
   Is the last parameter of reuse supposed to be true? Will there be any side 
affects? 



##########
xtable-core/src/test/java/org/apache/xtable/ITConversionController.java:
##########
@@ -798,34 +765,6 @@ public void testMetadataRetention() throws Exception {
     }
   }
 
-  @Test
-  void otherIcebergPartitionTypes() {

Review Comment:
   Why did we remove this test? 



##########
xtable-core/src/test/java/org/apache/xtable/ITConversionController.java:
##########
@@ -679,35 +679,6 @@ public void testOutOfSyncIncrementalSyncs() {
     }
   }
 
-  @Test
-  public void testIncrementalSyncsWithNoChangesDoesNotThrowError() {

Review Comment:
   Why did we remove this test? 



##########
xtable-core/src/test/java/org/apache/xtable/ITConversionController.java:
##########
@@ -533,28 +536,31 @@ private static Stream<Arguments> 
provideArgsForPartitionTesting() {
         Arguments.of(
             buildArgsForPartition(
                 ICEBERG, Arrays.asList(DELTA, HUDI), null, "level:VALUE", 
levelFilter)),
-        Arguments.of(
-            // Delta Lake does not currently support nested partition columns
-            buildArgsForPartition(
-                HUDI,
-                Arrays.asList(ICEBERG),
-                "nested_record.level:SIMPLE",
-                "nested_record.level:VALUE",
-                nestedLevelFilter)),
+        // Different issue, didn't investigate this much at all

Review Comment:
   What's the issue? 



##########
xtable-core/src/test/java/org/apache/xtable/ITConversionController.java:
##########
@@ -395,6 +400,8 @@ public void testConcurrentInsertWritesInSource(
 
   @ParameterizedTest
   @MethodSource("testCasesWithPartitioningAndSyncModes")
+  @Disabled(

Review Comment:
   Any plans for enabling this back? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to