>From Michael Blow <[email protected]>:
Michael Blow has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20487?usp=email )
Change subject: [NO ISSUE][HYR][CLOUD] Ensure limited retries on interrupt
......................................................................
[NO ISSUE][HYR][CLOUD] Ensure limited retries on interrupt
Change-Id: I01f24240af54bbdbd5c5946ac57fe5fc01484d15
---
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/bulk/DeleteBulkCloudOperation.java
M
hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
2 files changed, 34 insertions(+), 20 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/87/20487/1
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/bulk/DeleteBulkCloudOperation.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/bulk/DeleteBulkCloudOperation.java
index fcb7bda..d0122ed 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/bulk/DeleteBulkCloudOperation.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/bulk/DeleteBulkCloudOperation.java
@@ -55,9 +55,7 @@
return 0;
}
- if (LOGGER.isDebugEnabled()) {
- LOGGER.debug("Bulk deleting: local: {}, cloud: {}",
fileReferences, paths);
- }
+ LOGGER.debug("Bulk deleting: local: {}, cloud: {}", fileReferences,
paths);
CloudRetryableRequestUtil.run(() -> cloudClient.deleteObjects(bucket,
paths));
// Bulk delete locally as well
super.performOperation();
diff --git
a/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
b/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
index e011900..f4880ac 100644
---
a/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
+++
b/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
@@ -48,18 +48,25 @@
* @see System#setProperty(String, String)
*/
public static final String CLOUD_UNSTABLE_MODE = "cloud.unstable.mode";
- private static final int STABLE_NUMBER_OF_RETRIES = 10;
- private static final long STABLE_MAX_DELAY_BETWEEN_RETRIES_IN_MILLIS =
10_000;
private static final int UNSTABLE_NUMBER_OF_RETRIES = 100;
- private static final int UNSTABLE_MAX_DELAY_BETWEEN_RETRIES_IN_MILLIS = 0;
private static final Logger LOGGER = LogManager.getLogger();
- private static final int NUMBER_OF_RETRIES = getNumberOfRetries();
- private static final long MAX_DELAY_BETWEEN_RETRIES =
getMaxDelayBetweenRetries();
+ private static final int NUMBER_OF_RETRIES;
+ private static final long MAX_DELAY_BETWEEN_RETRIES_MILLIS;
private static final ICloudRetryPredicate RETRY_ALWAYS_PREDICATE = e ->
true;
private static final ICloudBeforeRetryRequest NO_OP_BEFORE_RETRY = () -> {
};
+ static {
+ if (Boolean.getBoolean(CLOUD_UNSTABLE_MODE)) {
+ MAX_DELAY_BETWEEN_RETRIES_MILLIS = 0;
+ NUMBER_OF_RETRIES = UNSTABLE_NUMBER_OF_RETRIES;
+ } else {
+ MAX_DELAY_BETWEEN_RETRIES_MILLIS = 10_000;
+ NUMBER_OF_RETRIES = 10;
+ }
+ }
+
private CloudRetryableRequestUtil() {
}
@@ -106,19 +113,37 @@
public static <T> T run(ICloudReturnableRequest<T> request,
ICloudBeforeRetryRequest retry)
throws HyracksDataException {
boolean interrupted = Thread.interrupted();
+ int attempt = 1;
+ IRetryPolicy retryPolicy = null;
try {
while (true) {
try {
return doRun(request, retry, RETRY_ALWAYS_PREDICATE);
} catch (Throwable e) {
// First, clear the interrupted flag
- interrupted |= Thread.interrupted();
- if (ExceptionUtils.causedByInterrupt(e)) {
+ if (Thread.interrupted() ||
ExceptionUtils.causedByInterrupt(e)) {
+ // TODO(mblow): it seems we should consider sharing
the retry state with doRun- for now, we
+ // maintain an outer retry count with the
same specs, which in the most
+ // pathological case can result in
NUMBER_OF_RETRIES^2 attempts
interrupted = true;
} else {
// The cause isn't an interruption, rethrow
throw e;
}
+ if (retryPolicy == null) {
+ retryPolicy = new
ExponentialRetryPolicy(NUMBER_OF_RETRIES, MAX_DELAY_BETWEEN_RETRIES_MILLIS);
+ }
+ do {
+ try {
+ if (!retryPolicy.retry(e)) {
+ throw HyracksDataException.create(e);
+ }
+ } catch (InterruptedException interruptedEx) {
+ // We were interrupted during the wait, so count
that as another interrupt
+ continue;
+ }
+ break;
+ } while (true);
retry.beforeRetry();
LOGGER.warn("Ignored interrupting
ICloudReturnableRequest", e);
}
@@ -189,7 +214,7 @@
throw HyracksDataException.create(e);
}
if (retryPolicy == null) {
- retryPolicy = new
ExponentialRetryPolicy(NUMBER_OF_RETRIES, MAX_DELAY_BETWEEN_RETRIES);
+ retryPolicy = new
ExponentialRetryPolicy(NUMBER_OF_RETRIES, MAX_DELAY_BETWEEN_RETRIES_MILLIS);
}
if (ExceptionUtils.causedByInterrupt(e) &&
!Thread.currentThread().isInterrupted()) {
LOGGER.warn("Lost suppressed interrupt during
ICloudReturnableRequest", e);
@@ -219,13 +244,4 @@
doRun(asReturnableRequest(request), retry, shouldRetry);
}
- private static int getNumberOfRetries() {
- boolean unstable = Boolean.getBoolean(CLOUD_UNSTABLE_MODE);
- return unstable ? UNSTABLE_NUMBER_OF_RETRIES :
STABLE_NUMBER_OF_RETRIES;
- }
-
- private static long getMaxDelayBetweenRetries() {
- boolean unstable = Boolean.getBoolean(CLOUD_UNSTABLE_MODE);
- return unstable ? UNSTABLE_MAX_DELAY_BETWEEN_RETRIES_IN_MILLIS :
STABLE_MAX_DELAY_BETWEEN_RETRIES_IN_MILLIS;
- }
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20487?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: phoenix
Gerrit-Change-Id: I01f24240af54bbdbd5c5946ac57fe5fc01484d15
Gerrit-Change-Number: 20487
Gerrit-PatchSet: 1
Gerrit-Owner: Michael Blow <[email protected]>