nsivabalan commented on a change in pull request #1858:
URL: https://github.com/apache/hudi/pull/1858#discussion_r467603967



##########
File path: 
hudi-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java
##########
@@ -0,0 +1,405 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.upgrade;
+
+import org.apache.hudi.client.HoodieWriteClient;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieFileGroup;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.table.HoodieTableVersion;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.view.SyncableFileSystemView;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.common.testutils.HoodieTestUtils;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.MarkerFiles;
+import org.apache.hudi.testutils.Assertions;
+import org.apache.hudi.testutils.HoodieClientTestBase;
+import org.apache.hudi.testutils.HoodieClientTestUtils;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static 
org.apache.hudi.common.table.HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME;
+import static 
org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
+import static 
org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Unit tests {@link UpgradeDowngrade}.
+ */
+public class TestUpgradeDowngrade extends HoodieClientTestBase {
+
+  private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with 
induceResiduesFromPrevUpgrade={0}, deletePartialMarkerFiles={1} and TableType = 
{2}";
+
+  public static Stream<Arguments> configParams() {
+    Object[][] data = new Object[][] {
+            {true, HoodieTableType.COPY_ON_WRITE}, {false, 
HoodieTableType.COPY_ON_WRITE},
+            {true, HoodieTableType.MERGE_ON_READ}, {false, 
HoodieTableType.MERGE_ON_READ}
+    };
+    return Stream.of(data).map(Arguments::of);
+  }
+
+  @Test
+  public void testLeftOverUpdatedPropFileCleanup() throws IOException {
+    testUpgradeInternal(true, true, HoodieTableType.MERGE_ON_READ);
+  }
+
+  @ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
+  @MethodSource("configParams")
+  public void testUpgrade(boolean deletePartialMarkerFiles, HoodieTableType 
tableType) throws IOException {
+    testUpgradeInternal(false, deletePartialMarkerFiles, tableType);
+  }
+
+  public void testUpgradeInternal(boolean induceResiduesFromPrevUpgrade, 
boolean deletePartialMarkerFiles, HoodieTableType tableType) throws IOException 
{
+    // init config, table and client.
+    Map<String, String> params = new HashMap<>();
+    if (tableType == HoodieTableType.MERGE_ON_READ) {
+      params.put(HOODIE_TABLE_TYPE_PROP_NAME, 
HoodieTableType.MERGE_ON_READ.name());
+      metaClient = HoodieTestUtils.init(hadoopConf, basePath, 
HoodieTableType.MERGE_ON_READ);
+    }
+    HoodieWriteConfig cfg = 
getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
+    HoodieWriteClient client = getHoodieWriteClient(cfg);
+
+    // prepare data. Make 2 commits, in which 2nd is not committed.
+    List<FileSlice> firstPartitionCommit2FileSlices = new ArrayList<>();
+    List<FileSlice> secondPartitionCommit2FileSlices = new ArrayList<>();
+    Pair<List<HoodieRecord>, List<HoodieRecord>> inputRecords = 
twoUpsertCommitDataWithTwoPartitions(firstPartitionCommit2FileSlices, 
secondPartitionCommit2FileSlices, cfg, client, false);
+
+    HoodieTable<?> table = this.getHoodieTable(metaClient, cfg);
+    HoodieInstant commitInstant = 
table.getPendingCommitTimeline().lastInstant().get();
+
+    // delete one of the marker files in 2nd commit if need be.
+    MarkerFiles markerFiles = new MarkerFiles(table, 
commitInstant.getTimestamp());
+    List<String> markerPaths = markerFiles.allMarkerFilePaths();
+    if (deletePartialMarkerFiles) {
+      String toDeleteMarkerFile = markerPaths.get(0);
+      table.getMetaClient().getFs().delete(new 
Path(table.getMetaClient().getTempFolderPath() + "/" + 
commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
+      markerPaths.remove(toDeleteMarkerFile);
+    }
+
+    // set hoodie.table.version to 0 in hoodie.properties file
+    metaClient.getTableConfig().setTableVersion(HoodieTableVersion.ZERO);
+
+    // if induce residues are set, copy property file to orig file.
+    if (induceResiduesFromPrevUpgrade) {
+      createResidualFile();
+    }
+
+    // should re-create marker files for 2nd commit since its pending. If 
there was any residues, no upgrade steps should happen except for updating the 
hoodie.table.version

Review comment:
       comments need fixing. 

##########
File path: 
hudi-client/src/main/java/org/apache/hudi/table/upgrade/UpgradeDowngrade.java
##########
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.upgrade;
+
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.HoodieTableVersion;
+import org.apache.hudi.common.util.FileIOUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import java.io.IOException;
+import java.util.Date;
+import java.util.Properties;
+
+/**
+ * Helper class to assist in upgrading/downgrading Hoodie when there is a 
version change.
+ */
+public class UpgradeDowngrade {
+
+  private static final Logger LOG = 
LogManager.getLogger(UpgradeDowngrade.class);
+  public static final String HOODIE_UPDATED_PROPERTY_FILE = 
"hoodie.properties.updated";
+
+  private HoodieTableMetaClient metaClient;
+  private HoodieWriteConfig config;
+  private JavaSparkContext jsc;
+  private transient FileSystem fs;
+  private Path updatedPropsFilePath;
+  private Path propsFilePath;
+
+  /**
+   * Perform Upgrade or Downgrade steps if required and updated table version 
if need be.
+   * <p>
+   * Starting from version 0.6.0, this upgrade/downgrade step will be added in 
all write paths.
+   *
+   * Essentially, if a dataset was created using any pre 0.6.0(for eg 0.5.3), 
and Hoodie version was upgraded to 0.6.0,
+   * Hoodie table version gets bumped to 1 and there are some upgrade steps 
need to be executed before doing any writes.
+   * Similarly, if a dataset was created using Hoodie version 0.6.0 or Hoodie 
table version 1 and then hoodie was downgraded
+   * to pre 0.6.0 or to Hoodie table version 0, then some downgrade steps need 
to be executed before proceeding w/ any writes.
+   *
+   * On a high level, these are the steps performed
+   *
+   * Step1 : Understand current hoodie table version and table version from 
hoodie.properties file
+   * Step2 : Delete any left over .upgraded from previous upgrade/downgrade
+   * Step3 : If version are different, perform upgrade/downgrade.
+   * Step4 : Copy hoodie.properties -> hoodie.properties.upgraded with the 
version updated
+   * Step6 : Rename hoodie.properties.updated to hoodie.properties
+   * </p>
+   *
+   * @param metaClient instance of {@link HoodieTableMetaClient} to use
+   * @param toVersion version to which upgrade or downgrade has to be done.
+   * @param config instance of {@link HoodieWriteConfig} to use.
+   * @param jsc instance of {@link JavaSparkContext} to use.
+   * @param instantTime current instant time that should not be touched.
+   */
+  public static void run(HoodieTableMetaClient metaClient, HoodieTableVersion 
toVersion, HoodieWriteConfig config,

Review comment:
       why use same name for this method and for the other method too? 

##########
File path: 
hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java
##########
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.cli.commands;
+
+import org.apache.hudi.cli.HoodieCLI;
+import org.apache.hudi.cli.testutils.AbstractShellIntegrationTest;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.HoodieTableVersion;
+import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.common.testutils.HoodieTestUtils;
+import org.apache.hudi.testutils.HoodieClientTestUtils;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Properties;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/**
+ * Tests {@link UpgradeOrDowngradeCommand}.
+ */
+public class TestUpgradeDowngradeCommand extends AbstractShellIntegrationTest {
+
+  private String tablePath;
+
+  @BeforeEach
+  public void init() throws IOException {
+    String tableName = "test_table";
+    tablePath = basePath + File.separator + tableName;
+    new TableCommand().createTable(
+        tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
+        "", TimelineLayoutVersion.VERSION_1, 
"org.apache.hudi.common.model.HoodieAvroPayload");
+
+    //Create some commits files and parquet files
+    String commitTime1 = "100";
+    String commitTime2 = "101";
+    HoodieTestDataGenerator.writePartitionMetadata(fs, 
HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath);

Review comment:
       probably you might have to follow something like this in 
TestUpgradeDowngrade. 

##########
File path: 
hudi-client/src/main/java/org/apache/hudi/table/upgrade/UpgradeDowngrade.java
##########
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.upgrade;
+
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.HoodieTableVersion;
+import org.apache.hudi.common.util.FileIOUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import java.io.IOException;
+import java.util.Date;
+import java.util.Properties;
+
+/**
+ * Helper class to assist in upgrading/downgrading Hoodie when there is a 
version change.
+ */
+public class UpgradeDowngrade {
+
+  private static final Logger LOG = 
LogManager.getLogger(UpgradeDowngrade.class);
+  public static final String HOODIE_UPDATED_PROPERTY_FILE = 
"hoodie.properties.updated";
+
+  private HoodieTableMetaClient metaClient;
+  private HoodieWriteConfig config;
+  private JavaSparkContext jsc;
+  private transient FileSystem fs;
+  private Path updatedPropsFilePath;
+  private Path propsFilePath;
+
+  /**
+   * Perform Upgrade or Downgrade steps if required and updated table version 
if need be.
+   * <p>
+   * Starting from version 0.6.0, this upgrade/downgrade step will be added in 
all write paths.
+   *
+   * Essentially, if a dataset was created using any pre 0.6.0(for eg 0.5.3), 
and Hoodie version was upgraded to 0.6.0,
+   * Hoodie table version gets bumped to 1 and there are some upgrade steps 
need to be executed before doing any writes.
+   * Similarly, if a dataset was created using Hoodie version 0.6.0 or Hoodie 
table version 1 and then hoodie was downgraded
+   * to pre 0.6.0 or to Hoodie table version 0, then some downgrade steps need 
to be executed before proceeding w/ any writes.
+   *
+   * On a high level, these are the steps performed
+   *
+   * Step1 : Understand current hoodie table version and table version from 
hoodie.properties file
+   * Step2 : Delete any left over .upgraded from previous upgrade/downgrade

Review comment:
       updated/upgraded. Lets use same terminology everywhere. Ignore 
addressing renaming/java docs/refactoring comments for now. Let's get the patch 
in for now. But leaving comments so that I can take it up after 0.6.0 release. 

##########
File path: 
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
##########
@@ -96,6 +97,8 @@ public HoodieTableConfig(FileSystem fs, String metaPath, 
String payloadClassName
       throw new HoodieIOException("Could not load Hoodie properties from " + 
propertyPath, e);
     }
     this.props = props;
+    
ValidationUtils.checkArgument(props.containsKey(HOODIE_TABLE_TYPE_PROP_NAME) && 
props.containsKey(HOODIE_TABLE_NAME_PROP_NAME),

Review comment:
       sorry, I don't get why we need this here. If properties contain table 
type and table name, why bail out? 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to