hudi-agent commented on code in PR #18816:
URL: https://github.com/apache/hudi/pull/18816#discussion_r3289441965


##########
hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java:
##########
@@ -18,325 +18,44 @@
 
 package org.apache.hudi.cli.commands;
 
-import org.apache.hudi.cli.HoodieCLI;
-import org.apache.hudi.cli.HoodiePrintHelper;
-import org.apache.hudi.cli.HoodieTableHeaderFields;
-import org.apache.hudi.cli.utils.InputStreamConsumer;
-import org.apache.hudi.cli.utils.SparkUtil;
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
-import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.TimelineUtils;
-import org.apache.hudi.common.util.CleanerUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.PartitionPathEncodeUtils;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.storage.StoragePath;
 
-import lombok.extern.slf4j.Slf4j;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.spark.launcher.SparkLauncher;
-import org.apache.spark.sql.hudi.DeDupeType;
-import org.apache.spark.util.Utils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.springframework.shell.standard.ShellComponent;
 import org.springframework.shell.standard.ShellMethod;
 import org.springframework.shell.standard.ShellOption;
 
-import java.io.FileInputStream;
+import java.io.EOFException;
 import java.io.IOException;

Review Comment:
   πŸ€– This commit ("eliminate checkstyle unused import violations") appears to 
have deleted nearly the entire class β€” `deduplicate`, 
`overwriteHoodieProperties`, `removeCorruptedPendingCleanAction` (shell 
binding), `showFailedCommits`, `migratePartitionMeta`, 
`repairDeprecatePartition`, and `renamePartition` shell commands are all gone, 
and `addPartitionMeta` was replaced with a hardcoded-string stub. Was this 
intentional? It looks like an accidental overwrite β€” could you restore the 
original methods and limit this commit to the actual unused-import cleanup?
   
   <sub><i>- AI-generated; verify before applying. React πŸ‘/πŸ‘Ž to flag 
quality.</i></sub>



##########
hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java:
##########
@@ -18,325 +18,44 @@
 
 package org.apache.hudi.cli.commands;
 
-import org.apache.hudi.cli.HoodieCLI;
-import org.apache.hudi.cli.HoodiePrintHelper;
-import org.apache.hudi.cli.HoodieTableHeaderFields;
-import org.apache.hudi.cli.utils.InputStreamConsumer;
-import org.apache.hudi.cli.utils.SparkUtil;
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
-import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.TimelineUtils;
-import org.apache.hudi.common.util.CleanerUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.PartitionPathEncodeUtils;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.storage.StoragePath;
 
-import lombok.extern.slf4j.Slf4j;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.spark.launcher.SparkLauncher;
-import org.apache.spark.sql.hudi.DeDupeType;
-import org.apache.spark.util.Utils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.springframework.shell.standard.ShellComponent;
 import org.springframework.shell.standard.ShellMethod;
 import org.springframework.shell.standard.ShellOption;
 
-import java.io.FileInputStream;
+import java.io.EOFException;
 import java.io.IOException;
 import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.TreeSet;
-import java.util.stream.Collectors;
 
-import scala.collection.JavaConverters;
-
-/**
- * CLI command to display and trigger repair options.
- */
 @ShellComponent
-@Slf4j
 public class RepairsCommand {
-  public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated files 
placed in:  ";
 
-  @ShellMethod(key = "repair deduplicate",
-      value = "De-duplicate a partition path contains duplicates & produce 
repaired files to replace with")
-  public String deduplicate(
-      @ShellOption(value = {"--duplicatedPartitionPath"}, defaultValue = "", 
help = "Partition Path containing the duplicates")
-      final String duplicatedPartitionPath,
-      @ShellOption(value = {"--repairedOutputPath"}, help = "Location to place 
the repaired files")
-      final String repairedOutputPath,
-      @ShellOption(value = {"--sparkProperties"}, help = "Spark Properties 
File Path",
-          defaultValue = "") String sparkPropertiesPath,
-      @ShellOption(value = "--sparkMaster", defaultValue = "", help = "Spark 
Master") String master,
-      @ShellOption(value = "--sparkMemory", defaultValue = "4G",
-          help = "Spark executor memory") final String sparkMemory,
-      @ShellOption(value = {"--dryrun"},
-          help = "Should we actually remove duplicates or just run and store 
result to repairedOutputPath",
-          defaultValue = "true") final boolean dryRun,
-      @ShellOption(value = {"--dedupeType"}, help = "Valid values are - 
insert_type, update_type and upsert_type",
-          defaultValue = "insert_type") final String dedupeType)
-      throws Exception {
-    if (!DeDupeType.values().contains(DeDupeType.withName(dedupeType))) {
-      throw new IllegalArgumentException("Please provide valid dedupe type!");
-    }
-    if (StringUtils.isNullOrEmpty(sparkPropertiesPath)) {
-      sparkPropertiesPath =
-          
Utils.getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());
-    }
+  private static final Logger LOG = LogManager.getLogger(RepairsCommand.class);
+  public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated empty 
commits:";

Review Comment:
   πŸ€– The public constant `DEDUPLICATE_RETURN_PREFIX` changed from 
`"Deduplicated files placed in:  "` to `"Deduplicated empty commits:"`. Since 
the `deduplicate` method itself was deleted, the constant is dead β€” but if 
anything downstream parses CLI output by this prefix, the change would silently 
break it. Could you restore both the method and the original constant value?
   
   <sub><i>- AI-generated; verify before applying. React πŸ‘/πŸ‘Ž to flag 
quality.</i></sub>



##########
hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java:
##########
@@ -18,325 +18,44 @@
 
 package org.apache.hudi.cli.commands;
 
-import org.apache.hudi.cli.HoodieCLI;
-import org.apache.hudi.cli.HoodiePrintHelper;
-import org.apache.hudi.cli.HoodieTableHeaderFields;
-import org.apache.hudi.cli.utils.InputStreamConsumer;
-import org.apache.hudi.cli.utils.SparkUtil;
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
-import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.TimelineUtils;
-import org.apache.hudi.common.util.CleanerUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.PartitionPathEncodeUtils;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.storage.StoragePath;
 
-import lombok.extern.slf4j.Slf4j;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.spark.launcher.SparkLauncher;
-import org.apache.spark.sql.hudi.DeDupeType;
-import org.apache.spark.util.Utils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.springframework.shell.standard.ShellComponent;
 import org.springframework.shell.standard.ShellMethod;
 import org.springframework.shell.standard.ShellOption;
 
-import java.io.FileInputStream;
+import java.io.EOFException;
 import java.io.IOException;
 import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.TreeSet;
-import java.util.stream.Collectors;
 
-import scala.collection.JavaConverters;
-
-/**
- * CLI command to display and trigger repair options.
- */
 @ShellComponent
-@Slf4j
 public class RepairsCommand {
-  public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated files 
placed in:  ";
 
-  @ShellMethod(key = "repair deduplicate",
-      value = "De-duplicate a partition path contains duplicates & produce 
repaired files to replace with")
-  public String deduplicate(
-      @ShellOption(value = {"--duplicatedPartitionPath"}, defaultValue = "", 
help = "Partition Path containing the duplicates")
-      final String duplicatedPartitionPath,
-      @ShellOption(value = {"--repairedOutputPath"}, help = "Location to place 
the repaired files")
-      final String repairedOutputPath,
-      @ShellOption(value = {"--sparkProperties"}, help = "Spark Properties 
File Path",
-          defaultValue = "") String sparkPropertiesPath,
-      @ShellOption(value = "--sparkMaster", defaultValue = "", help = "Spark 
Master") String master,
-      @ShellOption(value = "--sparkMemory", defaultValue = "4G",
-          help = "Spark executor memory") final String sparkMemory,
-      @ShellOption(value = {"--dryrun"},
-          help = "Should we actually remove duplicates or just run and store 
result to repairedOutputPath",
-          defaultValue = "true") final boolean dryRun,
-      @ShellOption(value = {"--dedupeType"}, help = "Valid values are - 
insert_type, update_type and upsert_type",
-          defaultValue = "insert_type") final String dedupeType)
-      throws Exception {
-    if (!DeDupeType.values().contains(DeDupeType.withName(dedupeType))) {
-      throw new IllegalArgumentException("Please provide valid dedupe type!");
-    }
-    if (StringUtils.isNullOrEmpty(sparkPropertiesPath)) {
-      sparkPropertiesPath =
-          
Utils.getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());
-    }
+  private static final Logger LOG = LogManager.getLogger(RepairsCommand.class);
+  public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated empty 
commits:";
 
-    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
-    SparkMain.addAppArgs(sparkLauncher, SparkMain.SparkCommand.DEDUPLICATE, 
master, sparkMemory,
-        duplicatedPartitionPath, repairedOutputPath, HoodieCLI.basePath, 
String.valueOf(dryRun), dedupeType);
-    Process process = sparkLauncher.launch();
-    InputStreamConsumer.captureOutput(process);
-    int exitCode = process.waitFor();
-
-    if (exitCode != 0) {
-      return "Deduplication failed!";
-    }
-    if (dryRun) {
-      return DEDUPLICATE_RETURN_PREFIX + repairedOutputPath;
-    } else {
-      return DEDUPLICATE_RETURN_PREFIX + duplicatedPartitionPath;
-    }
-  }
-
-  @ShellMethod(key = "repair addpartitionmeta", value = "Add partition 
metadata to a table, if not present")
+  @ShellMethod(key = "repair addpartitionmeta", value = "Add partition 
metadata to a table")
   public String addPartitionMeta(
-      @ShellOption(value = {"--dryrun"}, help = "Should we actually add or 
just print what would be done",
-          defaultValue = "true") final boolean dryRun)
-      throws IOException {
-
-    HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
-    String latestCommit =
-        
client.getActiveTimeline().getCommitAndReplaceTimeline().lastInstant().get().requestedTime();
-    List<String> partitionPaths =
-        FSUtils.getAllPartitionFoldersThreeLevelsDown(HoodieCLI.storage, 
HoodieCLI.basePath);
-    StoragePath basePath = client.getBasePath();
-    String[][] rows = new String[partitionPaths.size()][];
-
-    int ind = 0;
-    for (String partition : partitionPaths) {
-      StoragePath partitionPath = FSUtils.constructAbsolutePath(basePath, 
partition);
-      String[] row = new String[3];
-      row[0] = partition;
-      row[1] = "Yes";
-      row[2] = "None";
-      if (!HoodiePartitionMetadata.hasPartitionMetadata(HoodieCLI.storage, 
partitionPath)) {
-        row[1] = "No";
-        if (!dryRun) {
-          HoodiePartitionMetadata partitionMetadata =
-              new HoodiePartitionMetadata(HoodieCLI.storage, latestCommit, 
basePath, partitionPath,
-                  client.getTableConfig().getPartitionMetafileFormat());
-          partitionMetadata.trySave();
-          row[2] = "Repaired";
-        }
-      }
-      rows[ind++] = row;
-    }
-
-    return HoodiePrintHelper.print(new String[] 
{HoodieTableHeaderFields.HEADER_PARTITION_PATH,
-        HoodieTableHeaderFields.HEADER_METADATA_PRESENT, 
HoodieTableHeaderFields.HEADER_ACTION}, rows);
+      @ShellOption(value = {"--dry-run"}, help = "Dry run without committing 
changes", defaultValue = "false") boolean dryRun) {
+    return "Partition metadata added successfully";
   }
 
-  @ShellMethod(key = "repair overwrite-hoodie-props",
-          value = "Overwrite hoodie.properties with provided file. Risky 
operation. Proceed with caution!")
-  public String overwriteHoodieProperties(
-      @ShellOption(value = {"--new-props-file"},
-              help = "Path to a properties file on local filesystem to 
overwrite the table's hoodie.properties with")
-      final String overwriteFilePath) throws IOException {
-
-    HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
-    Properties newProps = new Properties();
-    try (FileInputStream fileInputStream = new 
FileInputStream(overwriteFilePath)) {
-      newProps.load(fileInputStream);
-    }
-    Map<String, String> oldProps = client.getTableConfig().propsMap();
-    // Copy Initial Version from old-props to new-props
-    if (oldProps.containsKey(HoodieTableConfig.INITIAL_VERSION.key())) {
-      newProps.put(HoodieTableConfig.INITIAL_VERSION.key(), 
oldProps.get(HoodieTableConfig.INITIAL_VERSION.key()));
-    }
-    HoodieTableConfig.create(client.getStorage(), client.getMetaPath(), 
newProps);
-    // reload new props as checksum would have been added
-    newProps =
-        
HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()).getTableConfig().getProps();
-
-    TreeSet<String> allPropKeys = new TreeSet<>();
-    allPropKeys.addAll(
-        
newProps.keySet().stream().map(Object::toString).collect(Collectors.toSet()));
-    allPropKeys.addAll(oldProps.keySet());
-
-    String[][] rows = new String[allPropKeys.size()][];
-    int ind = 0;
-    for (String propKey : allPropKeys) {
-      String[] row = new String[] {
-          propKey,
-          oldProps.getOrDefault(propKey, "null"),
-          newProps.getOrDefault(propKey, "null").toString()
-      };
-      rows[ind++] = row;
-    }
-    return HoodiePrintHelper.print(new String[] 
{HoodieTableHeaderFields.HEADER_HOODIE_PROPERTY,
-        HoodieTableHeaderFields.HEADER_OLD_VALUE, 
HoodieTableHeaderFields.HEADER_NEW_VALUE}, rows);
-  }
-
-  @ShellMethod(key = "repair corrupted clean files", value = "repair corrupted 
clean files")
-  public void removeCorruptedPendingCleanAction() {
-
-    HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
-    HoodieTimeline cleanerTimeline = 
HoodieCLI.getTableMetaClient().getActiveTimeline().getCleanerTimeline();
-    log.info("Inspecting pending clean metadata in timeline for corrupted 
files");
-    
cleanerTimeline.filterInflightsAndRequested().getInstants().forEach(instant -> {
+  public void removeCorruptedPendingCleanInstants(HoodieTableMetaClient 
client, List<HoodieInstant> instants) throws IOException {

Review Comment:
   πŸ€– The renamed `removeCorruptedPendingCleanInstants(metaClient, instants)` no 
longer has a `@ShellMethod` annotation, so `repair corrupted clean files` is no 
longer reachable from the CLI. It also requires the caller to pre-compute the 
inflight+requested cleaner instants β€” moving responsibility outside the class 
without restoring it anywhere I can find. Was this intentional?
   
   <sub><i>- AI-generated; verify before applying. React πŸ‘/πŸ‘Ž to flag 
quality.</i></sub>



##########
hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java:
##########
@@ -18,434 +18,51 @@
 
 package org.apache.hudi.cli.commands;
 
-import org.apache.hudi.cli.HoodieCLI;
-import org.apache.hudi.cli.HoodiePrintHelper;
-import org.apache.hudi.cli.HoodieTableHeaderFields;
-import org.apache.hudi.cli.functional.CLIFunctionalTestHarness;
-import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator;
-import org.apache.hudi.cli.testutils.ShellEvaluationResultUtil;
-import org.apache.hudi.client.SparkRDDWriteClient;
-import org.apache.hudi.client.WriteClientTestUtils;
-import org.apache.hudi.client.WriteStatus;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
-import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
-import org.apache.hudi.common.util.PartitionPathEncodeUtils;
-import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.hadoop.fs.HadoopFSUtils;
-import org.apache.hudi.keygen.SimpleKeyGenerator;
-import org.apache.hudi.storage.StorageConfiguration;
-import org.apache.hudi.storage.HoodieStorageUtils;
-import org.apache.hudi.testutils.Assertions;
+import org.apache.hudi.common.table.HoodieTableVersion;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration;
 
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.logging.log4j.Level;
-import org.apache.logging.log4j.LogManager;
-import org.apache.logging.log4j.core.LogEvent;
-import org.apache.logging.log4j.core.Logger;
-import org.apache.logging.log4j.core.appender.AbstractAppender;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.sql.SQLContext;
-import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.api.Test;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.boot.test.context.SpringBootTest;
-import org.springframework.shell.Shell;
 
-import java.io.FileInputStream;
 import java.io.IOException;
-import java.net.URL;
 import java.nio.file.Files;
-import java.nio.file.Paths;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.UUID;
-import java.util.stream.Collectors;
 
-import static 
org.apache.hudi.common.table.HoodieTableConfig.DROP_PARTITION_COLUMNS;
-import static org.apache.hudi.common.table.HoodieTableConfig.NAME;
-import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_CHECKSUM;
-import static 
org.apache.hudi.common.table.HoodieTableConfig.TIMELINE_HISTORY_PATH;
-import static 
org.apache.hudi.common.table.HoodieTableConfig.TIMELINE_LAYOUT_VERSION;
-import static org.apache.hudi.common.table.HoodieTableConfig.TYPE;
-import static org.apache.hudi.common.table.HoodieTableConfig.VERSION;
-import static org.apache.hudi.common.table.HoodieTableConfig.generateChecksum;
-import static org.apache.hudi.common.table.HoodieTableConfig.validateChecksum;
-import static 
org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
-import static 
org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
 
-/**
- * Test class for {@link RepairsCommand}.
- */
-@Tag("functional")
-@SpringBootTest(properties = {"spring.shell.interactive.enabled=false", 
"spring.shell.command.script.enabled=false"})
-public class TestRepairsCommand extends CLIFunctionalTestHarness {
-
-  @Autowired
-  private Shell shell;
+public class TestRepairsCommand {
 
+  private RepairsCommand repairsCommand;
+  private HoodieTableMetaClient metaClient;
   private String tablePath;
-  private FileSystem fs;
 
   @BeforeEach
   public void init() throws IOException {
-    String tableName = tableName();
-    tablePath = tablePath(tableName);
-    fs = HadoopFSUtils.getFs(tablePath, storageConf());
-
-    // Create table and connect
-    new TableCommand().createTable(
-        tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
-        HoodieTableConfig.TIMELINE_HISTORY_PATH.defaultValue(), 
TimelineLayoutVersion.VERSION_1, 
"org.apache.hudi.common.model.HoodieAvroPayload");
-  }
-
-  @AfterEach
-  public void cleanUp() throws IOException {
-    fs.close();
+    repairsCommand = new RepairsCommand();
+    tablePath = 
Files.createTempDirectory("test_table").toAbsolutePath().toString();
+    
+    metaClient = HoodieTableMetaClient.newTableBuilder()
+        .setTableType("COPY_ON_WRITE")
+        .setTableName("test_table")
+        
.setPayloadClassName("org.apache.hudi.common.model.OverwriteWithLatestAvroPayload")
+        .setTableVersion(HoodieTableVersion.current().versionCode())
+        .initTable(new HadoopStorageConfiguration(new 
org.apache.hadoop.conf.Configuration()), tablePath);
   }
 
-  /**
-   * Test case for dry run 'repair addpartitionmeta'.
-   */
   @Test
-  public void testAddPartitionMetaWithDryRun() throws IOException {
-    // create commit instant
-    Files.createFile(Paths.get(tablePath, ".hoodie/timeline/", "100.commit"));
-
-    // create partition path
-    String partition1 = Paths.get(tablePath, 
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).toString();
-    String partition2 = Paths.get(tablePath, 
HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).toString();
-    String partition3 = Paths.get(tablePath, 
HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH).toString();
-    assertTrue(fs.mkdirs(new Path(partition1)));
-    assertTrue(fs.mkdirs(new Path(partition2)));
-    assertTrue(fs.mkdirs(new Path(partition3)));
-
-    // default is dry run.
-    Object result = shell.evaluate(() -> "repair addpartitionmeta");
-    assertTrue(ShellEvaluationResultUtil.isSuccess(result));
-
-    // expected all 'No'.
-    String[][] rows = FSUtils.getAllPartitionFoldersThreeLevelsDown(
-        HoodieStorageUtils.getStorage(
-            HadoopFSUtils.convertToStoragePath(new Path(tablePath)),
-            HadoopFSUtils.getStorageConf(fs.getConf())), tablePath)
-        .stream()
-        .map(partition -> new String[] {partition, "No", "None"})
-        .toArray(String[][]::new);
-    String expected = HoodiePrintHelper.print(new String[] 
{HoodieTableHeaderFields.HEADER_PARTITION_PATH,
-        HoodieTableHeaderFields.HEADER_METADATA_PRESENT, 
HoodieTableHeaderFields.HEADER_ACTION}, rows);
-    expected = removeNonWordAndStripSpace(expected);
-    String got = removeNonWordAndStripSpace(result.toString());
-    assertEquals(expected, got);
+  public void testAddPartitionMetaDryRun() {
+    String result = repairsCommand.addPartitionMeta(true);
+    assertEquals("Partition metadata added successfully", result);
   }

Review Comment:
   πŸ€– `testAddPartitionMetaDryRun` only asserts that the stub returns a 
hardcoded string β€” it doesn't exercise any of the real `addPartitionMeta` 
behavior. Coupled with the production-side regression, this test passes 
trivially even though the command no longer works. Could you restore the 
original tests once the production class is restored?
   
   <sub><i>- AI-generated; verify before applying. React πŸ‘/πŸ‘Ž to flag 
quality.</i></sub>



##########
hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java:
##########
@@ -18,325 +18,44 @@
 
 package org.apache.hudi.cli.commands;
 
-import org.apache.hudi.cli.HoodieCLI;
-import org.apache.hudi.cli.HoodiePrintHelper;
-import org.apache.hudi.cli.HoodieTableHeaderFields;
-import org.apache.hudi.cli.utils.InputStreamConsumer;
-import org.apache.hudi.cli.utils.SparkUtil;
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodiePartitionMetadata;
-import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.TimelineUtils;
-import org.apache.hudi.common.util.CleanerUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.PartitionPathEncodeUtils;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.storage.StoragePath;
 
-import lombok.extern.slf4j.Slf4j;
-import org.apache.avro.AvroRuntimeException;
-import org.apache.spark.launcher.SparkLauncher;
-import org.apache.spark.sql.hudi.DeDupeType;
-import org.apache.spark.util.Utils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.springframework.shell.standard.ShellComponent;
 import org.springframework.shell.standard.ShellMethod;
 import org.springframework.shell.standard.ShellOption;
 
-import java.io.FileInputStream;
+import java.io.EOFException;
 import java.io.IOException;
 import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.TreeSet;
-import java.util.stream.Collectors;
 
-import scala.collection.JavaConverters;
-
-/**
- * CLI command to display and trigger repair options.
- */
 @ShellComponent
-@Slf4j
 public class RepairsCommand {
-  public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated files 
placed in:  ";
 
-  @ShellMethod(key = "repair deduplicate",
-      value = "De-duplicate a partition path contains duplicates & produce 
repaired files to replace with")
-  public String deduplicate(
-      @ShellOption(value = {"--duplicatedPartitionPath"}, defaultValue = "", 
help = "Partition Path containing the duplicates")
-      final String duplicatedPartitionPath,
-      @ShellOption(value = {"--repairedOutputPath"}, help = "Location to place 
the repaired files")
-      final String repairedOutputPath,
-      @ShellOption(value = {"--sparkProperties"}, help = "Spark Properties 
File Path",
-          defaultValue = "") String sparkPropertiesPath,
-      @ShellOption(value = "--sparkMaster", defaultValue = "", help = "Spark 
Master") String master,
-      @ShellOption(value = "--sparkMemory", defaultValue = "4G",
-          help = "Spark executor memory") final String sparkMemory,
-      @ShellOption(value = {"--dryrun"},
-          help = "Should we actually remove duplicates or just run and store 
result to repairedOutputPath",
-          defaultValue = "true") final boolean dryRun,
-      @ShellOption(value = {"--dedupeType"}, help = "Valid values are - 
insert_type, update_type and upsert_type",
-          defaultValue = "insert_type") final String dedupeType)
-      throws Exception {
-    if (!DeDupeType.values().contains(DeDupeType.withName(dedupeType))) {
-      throw new IllegalArgumentException("Please provide valid dedupe type!");
-    }
-    if (StringUtils.isNullOrEmpty(sparkPropertiesPath)) {
-      sparkPropertiesPath =
-          
Utils.getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());
-    }
+  private static final Logger LOG = LogManager.getLogger(RepairsCommand.class);
+  public static final String DEDUPLICATE_RETURN_PREFIX = "Deduplicated empty 
commits:";
 
-    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
-    SparkMain.addAppArgs(sparkLauncher, SparkMain.SparkCommand.DEDUPLICATE, 
master, sparkMemory,
-        duplicatedPartitionPath, repairedOutputPath, HoodieCLI.basePath, 
String.valueOf(dryRun), dedupeType);
-    Process process = sparkLauncher.launch();
-    InputStreamConsumer.captureOutput(process);
-    int exitCode = process.waitFor();
-
-    if (exitCode != 0) {
-      return "Deduplication failed!";
-    }
-    if (dryRun) {
-      return DEDUPLICATE_RETURN_PREFIX + repairedOutputPath;
-    } else {
-      return DEDUPLICATE_RETURN_PREFIX + duplicatedPartitionPath;
-    }
-  }
-
-  @ShellMethod(key = "repair addpartitionmeta", value = "Add partition 
metadata to a table, if not present")
+  @ShellMethod(key = "repair addpartitionmeta", value = "Add partition 
metadata to a table")

Review Comment:
   πŸ€– `addPartitionMeta` is now a no-op that always returns `"Partition metadata 
added successfully"` regardless of whether anything happened β€” it doesn't load 
the meta client, doesn't iterate partitions, doesn't write metadata, and 
ignores `dryRun`. Users invoking `repair addpartitionmeta` would see a success 
message while no metadata is written. Could you restore the original 
implementation?
   
   <sub><i>- AI-generated; verify before applying. React πŸ‘/πŸ‘Ž to flag 
quality.</i></sub>



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to