>From Ritik Raj <[email protected]>: Ritik Raj has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21213?usp=email )
Change subject: [ASTERIXDB-3768][CLOUD] Add per-link S3 checksumBehavior parameter ...................................................................... [ASTERIXDB-3768][CLOUD] Add per-link S3 checksumBehavior parameter - user model changes: yes - storage format changes: no - interface changes: no ext-ref: MB-71732 Change-Id: Id8fd6405c4aec2a79f469259339d4584612e2479 --- A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.000.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.099.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.000.ddl.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.099.ddl.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java 8 files changed, 180 insertions(+), 13 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/13/21213/1 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.000.ddl.sqlpp new file mode 100644 index 0000000..b078f00 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.000.ddl.sqlpp @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +drop type test if exists; +create type test as open { +}; + +drop dataset test if exists; +create external dataset test(test) using S3 ( + ("accessKeyId"="dummyAccessKey"), + ("secretAccessKey"="dummySecretKey"), + ("region"="us-west-1"), + ("checksumBehavior"="invalid-checksum-value"), + ("serviceEndpoint"="http://127.0.0.1:8001"), + ("container"="playground"), + ("definition"="json-data/reviews/single-line/json"), + ("format"="json") +); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.099.ddl.sqlpp new file mode 100644 index 0000000..dc10acd --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.099.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.000.ddl.sqlpp new file mode 100644 index 0000000..5629b16 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.000.ddl.sqlpp @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +drop type test if exists; +create type test as open { +}; + +drop dataset test if exists; +create external dataset test(test) using S3 ( + ("accessKeyId"="dummyAccessKey"), + ("secretAccessKey"="dummySecretKey"), + ("region"="us-west-1"), + ("checksumBehavior"="when_required"), + ("serviceEndpoint"="http://127.0.0.1:8001"), + ("container"="playground"), + ("definition"="json-data/reviews/single-line/json"), + ("format"="json") +); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.099.ddl.sqlpp new file mode 100644 index 0000000..dc10acd --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.099.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml index 355cfd7..2e4987a 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml @@ -1290,6 +1290,17 @@ </compilation-unit> </test-case> <test-case FilePath="external-dataset/s3"> + <compilation-unit name="checksum-behavior/valid-value"> + <output-dir compare="Text">checksum-behavior/valid-value</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="external-dataset/s3"> + <compilation-unit name="checksum-behavior/invalid-value"> + <output-dir compare="Text">checksum-behavior/invalid-value</output-dir> + <expected-error>ASX1173: Invalid value for parameter 'checksumBehavior', allowed value(s): when_required, when_supported, auto</expected-error> + </compilation-unit> + </test-case> + <test-case FilePath="external-dataset/s3"> <compilation-unit name="anonymous_no_auth"> <output-dir compare="Text">anonymous_no_auth</output-dir> <expected-error>ASX3119: Parameter 'secretAccessKey' is required if 'accessKeyId' is provided</expected-error> diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java index 94b6f5a..7952911 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java @@ -23,7 +23,9 @@ import java.util.Map; import java.util.Objects; +import org.apache.asterix.common.exceptions.CompilationException; import org.apache.asterix.external.util.aws.AwsConstants; +import org.apache.asterix.external.util.aws.s3.S3Utils; import org.apache.hyracks.cloud.io.ICloudProperties; import org.apache.hyracks.cloud.io.S3ChecksumBehavior; @@ -129,19 +131,23 @@ } } - public static S3ClientConfig of(Map<String, String> configuration, int writeBufferSize) { + public static S3ClientConfig of(Map<String, String> configuration, int writeBufferSize) + throws CompilationException { String endPoint = configuration.getOrDefault(AwsConstants.SERVICE_END_POINT_FIELD_NAME, ""); // Disabled long profilerLogInterval = 0; - // Dummy values; + // Dummy values String region = ""; String prefix = ""; Collection<String> certificates = Collections.emptyList(); boolean anonymousAuth = false; + S3ChecksumBehavior checksumBehavior = S3Utils.resolveChecksumBehavior(configuration, endPoint); + return new S3ClientConfig(region, endPoint, prefix, anonymousAuth, certificates, profilerLogInterval, - writeBufferSize, S3ParallelDownloaderClientType.ASYNC, false); + writeBufferSize, 1, 0, 0, 0, false, false, 0, 0, -1, S3ParallelDownloaderClientType.ASYNC, false, "", + "", checksumBehavior); } public String getRegion() { diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java index 2e5d213..9ea1f35 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java @@ -27,6 +27,7 @@ public static final int MAX_KEY_LENGTH_IN_BYTES = 1024; public static final String PATH_STYLE_ADDRESSING_FIELD_NAME = "pathStyleAddressing"; + public static final String CHECKSUM_BEHAVIOR_FIELD_NAME = "checksumBehavior"; /* * Hadoop-AWS diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java index 677d786..686523b 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java @@ -43,6 +43,7 @@ import static org.apache.asterix.external.util.aws.AwsUtils.getCrossRegion; import static org.apache.asterix.external.util.aws.AwsUtils.getPathStyleAddressing; import static org.apache.asterix.external.util.aws.AwsUtils.getRegion; +import static org.apache.asterix.external.util.aws.s3.S3Constants.CHECKSUM_BEHAVIOR_FIELD_NAME; import static org.apache.asterix.external.util.aws.s3.S3Constants.FILES; import static org.apache.asterix.external.util.aws.s3.S3Constants.FOLDERS; import static org.apache.asterix.external.util.aws.s3.S3Constants.HADOOP_ACCESS_KEY_ID; @@ -107,7 +108,6 @@ import org.apache.hyracks.api.exceptions.IWarningCollector; import org.apache.hyracks.api.exceptions.SourceLocation; import org.apache.hyracks.api.exceptions.Warning; -import org.apache.hyracks.cloud.io.ICloudProperties; import org.apache.hyracks.cloud.io.S3ChecksumBehavior; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -136,6 +136,7 @@ public class S3Utils { private static final Logger LOGGER = LogManager.getLogger(); + static final String CHECKSUM_BEHAVIOR_ALLOWED_VALUES = "when_required, when_supported, auto"; private static final class StaticTrustManagersProvider implements TlsTrustManagersProvider { private final TrustManager[] trustManagers; @@ -187,15 +188,7 @@ } else if (certificates != null && !certificates.isBlank()) { builder.httpClient(createHttpClient(certificates)); } - ICloudProperties cloudProperties = appCtx.getCloudProperties(); - S3ChecksumBehavior checksumBehavior; - if (cloudProperties != null) { - checksumBehavior = cloudProperties.getS3ChecksumBehavior(); - } else { - // No cloud properties (external data context): default based on whether a custom endpoint is in use - checksumBehavior = S3ChecksumBehavior.defaultForEndpoint(serviceEndpoint); - } - applyChecksumBehavior(builder, checksumBehavior); + applyChecksumBehavior(builder, resolveChecksumBehavior(configuration, serviceEndpoint)); awsClients.setConsumingClient(builder.build()); return awsClients; @@ -737,6 +730,46 @@ return errorCode.equals(ERROR_INTERNAL_ERROR) || errorCode.equals(ERROR_SLOW_DOWN); } + /** + * Resolves the effective checksum behavior for external data (link) operations using a two-level priority chain: + * <ol> + * <li>Per-link {@code checksumBehavior} parameter in the configuration map (explicit override)</li> + * <li>Endpoint-based default: {@link S3ChecksumBehavior#AUTO} for native AWS S3, + * {@link S3ChecksumBehavior#WHEN_REQUIRED} for S3-compatible storage</li> + * </ol> + * Cloud properties are intentionally not consulted here — this method is for external data (link) operations + * only, not for blob storage. + */ + public static S3ChecksumBehavior resolveChecksumBehavior(Map<String, String> configuration, String serviceEndpoint) + throws CompilationException { + String perLink = configuration.get(CHECKSUM_BEHAVIOR_FIELD_NAME); + if (perLink != null) { + try { + return S3ChecksumBehavior.fromString(perLink); + } catch (IllegalArgumentException e) { + throw new CompilationException(INVALID_PARAM_VALUE_ALLOWED_VALUE, CHECKSUM_BEHAVIOR_FIELD_NAME, + CHECKSUM_BEHAVIOR_ALLOWED_VALUES); + } + } + return S3ChecksumBehavior.defaultForEndpoint(serviceEndpoint); + } + + /** + * Validates a {@code checksumBehavior} string value. + * Throws {@link CompilationException} if the value is non-null and not a recognized enum value. + */ + public static void validateChecksumBehavior(String value) throws CompilationException { + if (value == null) { + return; + } + try { + S3ChecksumBehavior.fromString(value); + } catch (IllegalArgumentException e) { + throw new CompilationException(INVALID_PARAM_VALUE_ALLOWED_VALUE, CHECKSUM_BEHAVIOR_FIELD_NAME, + CHECKSUM_BEHAVIOR_ALLOWED_VALUES); + } + } + public static boolean validateAndGetPathStyleAddressing(String pathStyleAddressing, String endpoint) throws CompilationException { if (pathStyleAddressing == null) { -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21213?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: newchange Gerrit-Project: asterixdb Gerrit-Branch: lumina Gerrit-Change-Id: Id8fd6405c4aec2a79f469259339d4584612e2479 Gerrit-Change-Number: 21213 Gerrit-PatchSet: 1 Gerrit-Owner: Ritik Raj <[email protected]>
