[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.
nsivabalan commented on a change in pull request #3887: URL: https://github.com/apache/hudi/pull/3887#discussion_r810679925 ## File path: hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java ## @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.util; + +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.stream.Collectors; + +public class RetryHelper { + private static final Logger LOG = LogManager.getLogger(RetryHelper.class); + private CheckedFunction func; + private int num; + private long maxIntervalTime; Review comment: can some of these be final ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.
nsivabalan commented on a change in pull request #3887: URL: https://github.com/apache/hudi/pull/3887#discussion_r810679685 ## File path: hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemRetryConfig.java ## @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.fs; + +import org.apache.hudi.common.config.ConfigClassProperty; +import org.apache.hudi.common.config.ConfigGroups; +import org.apache.hudi.common.config.ConfigProperty; +import org.apache.hudi.common.config.HoodieConfig; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Properties; + +/** + * The file system retry relevant config options. + */ +@ConfigClassProperty(name = "FileSystem Guard Configurations", +groupName = ConfigGroups.Names.WRITE_CLIENT, +description = "The filesystem retry related config options, to help deal with runtime exception like list/get/put/delete performance issues.") +public class FileSystemRetryConfig extends HoodieConfig { + + public static final ConfigProperty FILESYSTEM_RETRY_ENABLE = ConfigProperty + .key("hoodie.filesystem.operation.retry.enable") + .defaultValue("false") + .sinceVersion("0.11.0") + .withDocumentation("Enabled to handle list/get/delete etc file system performance issue."); + + public static final ConfigProperty INITIAL_RETRY_INTERVAL_MS = ConfigProperty + .key("hoodie.filesystem.operation.retry.initial_interval_ms") + .defaultValue(100L) + .sinceVersion("0.11.0") + .withDocumentation("Amount of time (in ms) to wait, before retry to do operations on storage."); + + public static final ConfigProperty MAX_RETRY_INTERVAL_MS = ConfigProperty + .key("hoodie.filesystem.operation.retry.max_interval_ms") + .defaultValue(2000L) + .sinceVersion("0.11.0") + .withDocumentation("Maximum amount of time (in ms), to wait for next retry."); + + public static final ConfigProperty MAX_RETRY_NUMBERS = ConfigProperty + .key("hoodie.filesystem.operation.retry.max_numbers") Review comment: may be we can name this as "hoodie.filesystem.operation.retry.max.count" ## File path: hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java ## @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.util; + +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.stream.Collectors; + +public class RetryHelper { + private static final Logger LOG = LogManager.getLogger(RetryHelper.class); + private CheckedFunction func; + private int num; + private long maxIntervalTime; Review comment: can come of these be final ? ## File path: hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java ## @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain
[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.
nsivabalan commented on a change in pull request #3887: URL: https://github.com/apache/hudi/pull/3887#discussion_r790400915 ## File path: hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java ## @@ -253,7 +265,16 @@ public TimelineLayoutVersion getTimelineLayoutVersion() { */ public HoodieWrapperFileSystem getFs() { if (fs == null) { - FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy()); + FileSystem fileSystem; + if (fileSystemRetryConfig.isFileSystemActionRetryEnable()) { Review comment: Guess, what vinoth refers to is, we should add a property named supportsRetry or something to each StorageScheme. So, only for those storage schemes where retries are supported, we can check the config (fileSystemRetryConfig.isFileSystemActionRetryEnable()) and then invoke our retry Helper. @vinothchandar : can you please clarify on your above suggestion. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.
nsivabalan commented on a change in pull request #3887: URL: https://github.com/apache/hudi/pull/3887#discussion_r790371241 ## File path: hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java ## @@ -253,7 +265,16 @@ public TimelineLayoutVersion getTimelineLayoutVersion() { */ public HoodieWrapperFileSystem getFs() { if (fs == null) { - FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy()); + FileSystem fileSystem; + if (fileSystemRetryConfig.isFileSystemActionRetryEnable()) { Review comment: yeah, I am not sure if we can make it a property of StorageScheme. we should let users enable on a need basis. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.
nsivabalan commented on a change in pull request #3887: URL: https://github.com/apache/hudi/pull/3887#discussion_r790371241 ## File path: hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java ## @@ -253,7 +265,16 @@ public TimelineLayoutVersion getTimelineLayoutVersion() { */ public HoodieWrapperFileSystem getFs() { if (fs == null) { - FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy()); + FileSystem fileSystem; + if (fileSystemRetryConfig.isFileSystemActionRetryEnable()) { Review comment: we can disable by default and let interested users enable on their end. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.
nsivabalan commented on a change in pull request #3887: URL: https://github.com/apache/hudi/pull/3887#discussion_r772698180 ## File path: hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemRetryConfig.java ## @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.fs; + +import org.apache.hudi.common.config.ConfigClassProperty; +import org.apache.hudi.common.config.ConfigGroups; +import org.apache.hudi.common.config.ConfigProperty; +import org.apache.hudi.common.config.HoodieConfig; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Properties; + +/** + * The file system retry relevant config options. + */ +@ConfigClassProperty(name = "FileSystem Guard Configurations", +groupName = ConfigGroups.Names.WRITE_CLIENT, +description = "The filesystem retry related config options, to help deal with runtime exception like list/get/put/delete performance issues.") +public class FileSystemRetryConfig extends HoodieConfig { + + public static final ConfigProperty FILESYSTEM_RETRY_ENABLE = ConfigProperty + .key("hoodie.filesystem.action.retry.enable") + .defaultValue("false") + .sinceVersion("0.10.0") Review comment: we need to fix this to 0.11.0 ## File path: hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java ## @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version loop.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-loop.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.fs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Progressable; +import org.apache.hudi.common.util.RetryHelper; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Tests file system utils with retry wrapper enable. + */ +public class TestFSUtilsWithRetryWrapperEnable extends TestFSUtils { + + public RetryHelper retryHelper; + public static final String EXCEPTION_MESSAGE = "Fake runtime exception here."; + + @Override + @BeforeEach + public void setUp() throws IOException { +initMetaClient(); +FileSystemRetryConfig fileSystemRetryConfig = FileSystemRetryConfig.newBuilder().withFileSystemActionRetryEnabled(true).build(); +retryHelper = new RetryHelper<>() +.tryMaxInterval(fileSystemRetryConfig.getMaxRetryIntervalMs()) +.tryNum(fileSystemRetryConfig.getMaxRetryNumbers()) + .tryInitialInterval(fileSystemRetryConfig.getInitialRetryIntervalMs()); +FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(FSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 1); +FileSystem fileSystem = new HoodieRetryWrapperFileSystem(fakeFs, retryHelper); +HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(fileSystem, new NoOpConsistencyGuard()); +
[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.
nsivabalan commented on a change in pull request #3887: URL: https://github.com/apache/hudi/pull/3887#discussion_r766321890 ## File path: hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemGuardConfig.java ## @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.fs; + +import org.apache.hudi.common.config.ConfigClassProperty; +import org.apache.hudi.common.config.ConfigGroups; +import org.apache.hudi.common.config.ConfigProperty; +import org.apache.hudi.common.config.HoodieConfig; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Properties; + +/** + * The consistency guard relevant config options. + */ +@ConfigClassProperty(name = "FileSystem Guard Configurations", +groupName = ConfigGroups.Names.WRITE_CLIENT, +description = "The filesystem guard related config options, to help deal with runtime exception like s3 list/get/put/delete performance issues.") +public class FileSystemGuardConfig extends HoodieConfig { Review comment: do you think naming this "FileSystemRetryConfig" would be more appropriate? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [hudi] nsivabalan commented on a change in pull request #3887: [HUDI-2648] Retry FileSystem action instead of failed directly.
nsivabalan commented on a change in pull request #3887: URL: https://github.com/apache/hudi/pull/3887#discussion_r754700441 ## File path: hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemGuardConfig.java ## @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.fs; + +import org.apache.hudi.common.config.ConfigClassProperty; +import org.apache.hudi.common.config.ConfigGroups; +import org.apache.hudi.common.config.ConfigProperty; +import org.apache.hudi.common.config.HoodieConfig; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Properties; + +/** + * The consistency guard relevant config options. + */ +@ConfigClassProperty(name = "FileSystem Guard Configurations", +groupName = ConfigGroups.Names.WRITE_CLIENT, +description = "The filesystem guard related config options, to help deal with runtime exception like s3 list/get/put/delete performance issues.") +public class FileSystemGuardConfig extends HoodieConfig { + + public static final ConfigProperty FILESYSTEM_RETRY_ENABLE = ConfigProperty + .key("hoodie.filesystem.action.retry.enabled") + .defaultValue("false") + .sinceVersion("0.10.0") + .withDocumentation("Enabled to handle S3 list/get/delete etc file system performance issue."); + + public static final ConfigProperty INITIAL_RETRY_INTERVAL_MS = ConfigProperty + .key("hoodie.filesystem.action.retry.initial_interval_ms") + .defaultValue(100L) + .sinceVersion("0.10.0") + .withDocumentation("Amount of time (in ms) to wait, before retry to do operations on storage."); + + public static final ConfigProperty MAX_RETRY_INTERVAL_MS = ConfigProperty + .key("hoodie.filesystem.action.retry.max_interval_ms") + .defaultValue(2000L) + .sinceVersion("0.10.0") + .withDocumentation("Maximum amount of time (in ms), to wait for next retry."); + + public static final ConfigProperty MAX_RETRY_NUMBERS = ConfigProperty + .key("hoodie.filesystem.action.retry.max_numbers") Review comment: can you help me understand, why do we need both max retry number and max internal ms ? I thought either one is good enough. So, either 100*4 = 4 retries w/ 100 ms delay. or 2000/100 = 20 retries w/ 100 ms delay. ## File path: hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java ## @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.util; + +import org.apache.hudi.common.fs.HoodieWrapperFileSystem; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; + +import java.io.IOException; +import java.util.Random; + +public class RetryHelper { + private static final Logger LOG = LogManager.getLogger(RetryHelper.class); + private HoodieWrapperFileSystem.CheckedFunction func; + private int num; + private long maxIntervalTime; + private long initialIntervalTime = 100L; + private String taskInfo = "N/A"; + + public RetryHelper() { + } + + public RetryHelper(String taskInfo) { +this.taskInfo = taskInfo; + } + + public RetryHelper tryWith(HoodieWrapperFileSystem.CheckedFunction func) { +this.func = func; +return this; + } + + public RetryHelper tryNum(int num) { +this.num = num; +return this; + } + + public RetryHelper tryTaskInfo(String taskInfo) { +