[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.

2022-02-20 Thread GitBox


nsivabalan commented on a change in pull request #3887:
URL: https://github.com/apache/hudi/pull/3887#discussion_r810679925



##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java
##
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.Collectors;
+
+public class RetryHelper {
+  private static final Logger LOG = LogManager.getLogger(RetryHelper.class);
+  private CheckedFunction func;
+  private int num;
+  private long maxIntervalTime;

Review comment:
   can some of these be final ? 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.

2022-02-20 Thread GitBox


nsivabalan commented on a change in pull request #3887:
URL: https://github.com/apache/hudi/pull/3887#discussion_r810679685



##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemRetryConfig.java
##
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.fs;
+
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * The file system retry relevant config options.
+ */
+@ConfigClassProperty(name = "FileSystem Guard Configurations",
+groupName = ConfigGroups.Names.WRITE_CLIENT,
+description = "The filesystem retry related config options, to help 
deal with runtime exception like list/get/put/delete performance issues.")
+public class FileSystemRetryConfig  extends HoodieConfig {
+
+  public static final ConfigProperty FILESYSTEM_RETRY_ENABLE = 
ConfigProperty
+  .key("hoodie.filesystem.operation.retry.enable")
+  .defaultValue("false")
+  .sinceVersion("0.11.0")
+  .withDocumentation("Enabled to handle list/get/delete etc file system 
performance issue.");
+
+  public static final ConfigProperty INITIAL_RETRY_INTERVAL_MS = 
ConfigProperty
+  .key("hoodie.filesystem.operation.retry.initial_interval_ms")
+  .defaultValue(100L)
+  .sinceVersion("0.11.0")
+  .withDocumentation("Amount of time (in ms) to wait, before retry to do 
operations on storage.");
+
+  public static final ConfigProperty MAX_RETRY_INTERVAL_MS = 
ConfigProperty
+  .key("hoodie.filesystem.operation.retry.max_interval_ms")
+  .defaultValue(2000L)
+  .sinceVersion("0.11.0")
+  .withDocumentation("Maximum amount of time (in ms), to wait for next 
retry.");
+
+  public static final ConfigProperty MAX_RETRY_NUMBERS = 
ConfigProperty
+  .key("hoodie.filesystem.operation.retry.max_numbers")

Review comment:
   may be we can name this as "hoodie.filesystem.operation.retry.max.count"

##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java
##
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.Collectors;
+
+public class RetryHelper {
+  private static final Logger LOG = LogManager.getLogger(RetryHelper.class);
+  private CheckedFunction func;
+  private int num;
+  private long maxIntervalTime;

Review comment:
   can come of these be final ? 

##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java
##
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain 

[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.

2022-01-23 Thread GitBox


nsivabalan commented on a change in pull request #3887:
URL: https://github.com/apache/hudi/pull/3887#discussion_r790400915



##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
##
@@ -253,7 +265,16 @@ public TimelineLayoutVersion getTimelineLayoutVersion() {
*/
   public HoodieWrapperFileSystem getFs() {
 if (fs == null) {
-  FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy());
+  FileSystem fileSystem;
+  if (fileSystemRetryConfig.isFileSystemActionRetryEnable()) {

Review comment:
   Guess, what vinoth refers to is, we should add a property named 
supportsRetry or something to each StorageScheme. So, only for those storage 
schemes where retries are supported, we can check the config 
(fileSystemRetryConfig.isFileSystemActionRetryEnable()) and then invoke our 
retry Helper. 
   
   @vinothchandar : can you please clarify on your above suggestion. 
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.

2022-01-23 Thread GitBox


nsivabalan commented on a change in pull request #3887:
URL: https://github.com/apache/hudi/pull/3887#discussion_r790371241



##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
##
@@ -253,7 +265,16 @@ public TimelineLayoutVersion getTimelineLayoutVersion() {
*/
   public HoodieWrapperFileSystem getFs() {
 if (fs == null) {
-  FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy());
+  FileSystem fileSystem;
+  if (fileSystemRetryConfig.isFileSystemActionRetryEnable()) {

Review comment:
   yeah, I am not sure if we can make it a property of StorageScheme. we 
should let users enable on a need basis. 
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.

2022-01-23 Thread GitBox


nsivabalan commented on a change in pull request #3887:
URL: https://github.com/apache/hudi/pull/3887#discussion_r790371241



##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
##
@@ -253,7 +265,16 @@ public TimelineLayoutVersion getTimelineLayoutVersion() {
*/
   public HoodieWrapperFileSystem getFs() {
 if (fs == null) {
-  FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy());
+  FileSystem fileSystem;
+  if (fileSystemRetryConfig.isFileSystemActionRetryEnable()) {

Review comment:
   we can disable by default and let interested users enable on their end.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.

2021-12-20 Thread GitBox


nsivabalan commented on a change in pull request #3887:
URL: https://github.com/apache/hudi/pull/3887#discussion_r772698180



##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemRetryConfig.java
##
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.fs;
+
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * The file system retry relevant config options.
+ */
+@ConfigClassProperty(name = "FileSystem Guard Configurations",
+groupName = ConfigGroups.Names.WRITE_CLIENT,
+description = "The filesystem retry related config options, to help 
deal with runtime exception like list/get/put/delete performance issues.")
+public class FileSystemRetryConfig  extends HoodieConfig {
+
+  public static final ConfigProperty FILESYSTEM_RETRY_ENABLE = 
ConfigProperty
+  .key("hoodie.filesystem.action.retry.enable")
+  .defaultValue("false")
+  .sinceVersion("0.10.0")

Review comment:
   we need to fix this to 0.11.0

##
File path: 
hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java
##
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version loop.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-loop.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.fs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hudi.common.util.RetryHelper;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+/**
+ * Tests file system utils with retry wrapper enable.
+ */
+public class TestFSUtilsWithRetryWrapperEnable extends TestFSUtils {
+
+  public RetryHelper retryHelper;
+  public static final String EXCEPTION_MESSAGE = "Fake runtime exception 
here.";
+
+  @Override
+  @BeforeEach
+  public void setUp() throws IOException {
+initMetaClient();
+FileSystemRetryConfig fileSystemRetryConfig = 
FileSystemRetryConfig.newBuilder().withFileSystemActionRetryEnabled(true).build();
+retryHelper = new RetryHelper<>()
+.tryMaxInterval(fileSystemRetryConfig.getMaxRetryIntervalMs())
+.tryNum(fileSystemRetryConfig.getMaxRetryNumbers())
+
.tryInitialInterval(fileSystemRetryConfig.getInitialRetryIntervalMs());
+FakeRemoteFileSystem fakeFs = new 
FakeRemoteFileSystem(FSUtils.getFs(metaClient.getMetaPath(), 
metaClient.getHadoopConf()), 1);
+FileSystem fileSystem = new HoodieRetryWrapperFileSystem(fakeFs, 
retryHelper);
+HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(fileSystem, new 
NoOpConsistencyGuard());
+

[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.

2021-12-09 Thread GitBox


nsivabalan commented on a change in pull request #3887:
URL: https://github.com/apache/hudi/pull/3887#discussion_r766321890



##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemGuardConfig.java
##
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.fs;
+
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * The consistency guard relevant config options.
+ */
+@ConfigClassProperty(name = "FileSystem Guard Configurations",
+groupName = ConfigGroups.Names.WRITE_CLIENT,
+description = "The filesystem guard related config options, to help 
deal with runtime exception like s3 list/get/put/delete performance issues.")
+public class FileSystemGuardConfig  extends HoodieConfig {

Review comment:
   do you think naming this "FileSystemRetryConfig" would be more 
appropriate? 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.

2021-11-22 Thread GitBox


nsivabalan commented on a change in pull request #3887:
URL: https://github.com/apache/hudi/pull/3887#discussion_r754700441



##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemGuardConfig.java
##
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.fs;
+
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * The consistency guard relevant config options.
+ */
+@ConfigClassProperty(name = "FileSystem Guard Configurations",
+groupName = ConfigGroups.Names.WRITE_CLIENT,
+description = "The filesystem guard related config options, to help 
deal with runtime exception like s3 list/get/put/delete performance issues.")
+public class FileSystemGuardConfig  extends HoodieConfig {
+
+  public static final ConfigProperty FILESYSTEM_RETRY_ENABLE = 
ConfigProperty
+  .key("hoodie.filesystem.action.retry.enabled")
+  .defaultValue("false")
+  .sinceVersion("0.10.0")
+  .withDocumentation("Enabled to handle S3 list/get/delete etc file system 
performance issue.");
+
+  public static final ConfigProperty INITIAL_RETRY_INTERVAL_MS = 
ConfigProperty
+  .key("hoodie.filesystem.action.retry.initial_interval_ms")
+  .defaultValue(100L)
+  .sinceVersion("0.10.0")
+  .withDocumentation("Amount of time (in ms) to wait, before retry to do 
operations on storage.");
+
+  public static final ConfigProperty MAX_RETRY_INTERVAL_MS = 
ConfigProperty
+  .key("hoodie.filesystem.action.retry.max_interval_ms")
+  .defaultValue(2000L)
+  .sinceVersion("0.10.0")
+  .withDocumentation("Maximum amount of time (in ms), to wait for next 
retry.");
+
+  public static final ConfigProperty MAX_RETRY_NUMBERS = 
ConfigProperty
+  .key("hoodie.filesystem.action.retry.max_numbers")

Review comment:
   can you help me understand, why do we need both max retry number and max 
internal ms ? I thought either one is good enough. 
   So, either 100*4 = 4 retries w/ 100 ms delay. 
   or 2000/100 = 20 retries w/ 100 ms delay. 

##
File path: 
hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java
##
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.Random;
+
+public class RetryHelper {
+  private static final Logger LOG = LogManager.getLogger(RetryHelper.class);
+  private HoodieWrapperFileSystem.CheckedFunction func;
+  private int num;
+  private long maxIntervalTime;
+  private long initialIntervalTime = 100L;
+  private String taskInfo = "N/A";
+
+  public RetryHelper() {
+  }
+
+  public RetryHelper(String taskInfo) {
+this.taskInfo = taskInfo;
+  }
+
+  public RetryHelper tryWith(HoodieWrapperFileSystem.CheckedFunction func) {
+this.func = func;
+return this;
+  }
+
+  public RetryHelper tryNum(int num) {
+this.num = num;
+return this;
+  }
+
+  public RetryHelper tryTaskInfo(String taskInfo) {
+