>From Ritik Raj <[email protected]>:

Ritik Raj has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21213?usp=email )


Change subject: [ASTERIXDB-3768][CLOUD] Add per-link S3 checksumBehavior 
parameter
......................................................................

[ASTERIXDB-3768][CLOUD] Add per-link S3 checksumBehavior parameter

- user model changes: yes
- storage format changes: no
- interface changes: no

ext-ref: MB-71732
Change-Id: Id8fd6405c4aec2a79f469259339d4584612e2479
---
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.000.ddl.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.099.ddl.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.000.ddl.sqlpp
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.099.ddl.sqlpp
M 
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
8 files changed, 180 insertions(+), 13 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/13/21213/1

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.000.ddl.sqlpp
new file mode 100644
index 0000000..b078f00
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.000.ddl.sqlpp
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+};
+
+drop dataset test if exists;
+create external dataset test(test) using S3 (
+    ("accessKeyId"="dummyAccessKey"),
+    ("secretAccessKey"="dummySecretKey"),
+    ("region"="us-west-1"),
+    ("checksumBehavior"="invalid-checksum-value"),
+    ("serviceEndpoint"="http://127.0.0.1:8001";),
+    ("container"="playground"),
+    ("definition"="json-data/reviews/single-line/json"),
+    ("format"="json")
+);
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.099.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.099.ddl.sqlpp
new file mode 100644
index 0000000..dc10acd
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/invalid-value/external_dataset.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.000.ddl.sqlpp
new file mode 100644
index 0000000..5629b16
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.000.ddl.sqlpp
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+};
+
+drop dataset test if exists;
+create external dataset test(test) using S3 (
+    ("accessKeyId"="dummyAccessKey"),
+    ("secretAccessKey"="dummySecretKey"),
+    ("region"="us-west-1"),
+    ("checksumBehavior"="when_required"),
+    ("serviceEndpoint"="http://127.0.0.1:8001";),
+    ("container"="playground"),
+    ("definition"="json-data/reviews/single-line/json"),
+    ("format"="json")
+);
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.099.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.099.ddl.sqlpp
new file mode 100644
index 0000000..dc10acd
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/checksum-behavior/valid-value/external_dataset.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 355cfd7..2e4987a 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -1290,6 +1290,17 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset/s3">
+      <compilation-unit name="checksum-behavior/valid-value">
+        <output-dir compare="Text">checksum-behavior/valid-value</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/s3">
+      <compilation-unit name="checksum-behavior/invalid-value">
+        <output-dir compare="Text">checksum-behavior/invalid-value</output-dir>
+        <expected-error>ASX1173: Invalid value for parameter 
'checksumBehavior', allowed value(s): when_required, when_supported, 
auto</expected-error>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset/s3">
       <compilation-unit name="anonymous_no_auth">
         <output-dir compare="Text">anonymous_no_auth</output-dir>
         <expected-error>ASX3119: Parameter 'secretAccessKey' is required if 
'accessKeyId' is provided</expected-error>
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
index 94b6f5a..7952911 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
@@ -23,7 +23,9 @@
 import java.util.Map;
 import java.util.Objects;

+import org.apache.asterix.common.exceptions.CompilationException;
 import org.apache.asterix.external.util.aws.AwsConstants;
+import org.apache.asterix.external.util.aws.s3.S3Utils;
 import org.apache.hyracks.cloud.io.ICloudProperties;
 import org.apache.hyracks.cloud.io.S3ChecksumBehavior;

@@ -129,19 +131,23 @@
         }
     }

-    public static S3ClientConfig of(Map<String, String> configuration, int 
writeBufferSize) {
+    public static S3ClientConfig of(Map<String, String> configuration, int 
writeBufferSize)
+            throws CompilationException {
         String endPoint = 
configuration.getOrDefault(AwsConstants.SERVICE_END_POINT_FIELD_NAME, "");
         // Disabled
         long profilerLogInterval = 0;

-        // Dummy values;
+        // Dummy values
         String region = "";
         String prefix = "";
         Collection<String> certificates = Collections.emptyList();
         boolean anonymousAuth = false;

+        S3ChecksumBehavior checksumBehavior = 
S3Utils.resolveChecksumBehavior(configuration, endPoint);
+
         return new S3ClientConfig(region, endPoint, prefix, anonymousAuth, 
certificates, profilerLogInterval,
-                writeBufferSize, S3ParallelDownloaderClientType.ASYNC, false);
+                writeBufferSize, 1, 0, 0, 0, false, false, 0, 0, -1, 
S3ParallelDownloaderClientType.ASYNC, false, "",
+                "", checksumBehavior);
     }

     public String getRegion() {
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java
index 2e5d213..9ea1f35 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Constants.java
@@ -27,6 +27,7 @@
     public static final int MAX_KEY_LENGTH_IN_BYTES = 1024;

     public static final String PATH_STYLE_ADDRESSING_FIELD_NAME = 
"pathStyleAddressing";
+    public static final String CHECKSUM_BEHAVIOR_FIELD_NAME = 
"checksumBehavior";

     /*
      * Hadoop-AWS
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
index 677d786..686523b 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/aws/s3/S3Utils.java
@@ -43,6 +43,7 @@
 import static org.apache.asterix.external.util.aws.AwsUtils.getCrossRegion;
 import static 
org.apache.asterix.external.util.aws.AwsUtils.getPathStyleAddressing;
 import static org.apache.asterix.external.util.aws.AwsUtils.getRegion;
+import static 
org.apache.asterix.external.util.aws.s3.S3Constants.CHECKSUM_BEHAVIOR_FIELD_NAME;
 import static org.apache.asterix.external.util.aws.s3.S3Constants.FILES;
 import static org.apache.asterix.external.util.aws.s3.S3Constants.FOLDERS;
 import static 
org.apache.asterix.external.util.aws.s3.S3Constants.HADOOP_ACCESS_KEY_ID;
@@ -107,7 +108,6 @@
 import org.apache.hyracks.api.exceptions.IWarningCollector;
 import org.apache.hyracks.api.exceptions.SourceLocation;
 import org.apache.hyracks.api.exceptions.Warning;
-import org.apache.hyracks.cloud.io.ICloudProperties;
 import org.apache.hyracks.cloud.io.S3ChecksumBehavior;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -136,6 +136,7 @@

 public class S3Utils {
     private static final Logger LOGGER = LogManager.getLogger();
+    static final String CHECKSUM_BEHAVIOR_ALLOWED_VALUES = "when_required, 
when_supported, auto";

     private static final class StaticTrustManagersProvider implements 
TlsTrustManagersProvider {
         private final TrustManager[] trustManagers;
@@ -187,15 +188,7 @@
         } else if (certificates != null && !certificates.isBlank()) {
             builder.httpClient(createHttpClient(certificates));
         }
-        ICloudProperties cloudProperties = appCtx.getCloudProperties();
-        S3ChecksumBehavior checksumBehavior;
-        if (cloudProperties != null) {
-            checksumBehavior = cloudProperties.getS3ChecksumBehavior();
-        } else {
-            // No cloud properties (external data context): default based on 
whether a custom endpoint is in use
-            checksumBehavior = 
S3ChecksumBehavior.defaultForEndpoint(serviceEndpoint);
-        }
-        applyChecksumBehavior(builder, checksumBehavior);
+        applyChecksumBehavior(builder, resolveChecksumBehavior(configuration, 
serviceEndpoint));
         awsClients.setConsumingClient(builder.build());
         return awsClients;

@@ -737,6 +730,46 @@
         return errorCode.equals(ERROR_INTERNAL_ERROR) || 
errorCode.equals(ERROR_SLOW_DOWN);
     }

+    /**
+     * Resolves the effective checksum behavior for external data (link) 
operations using a two-level priority chain:
+     * <ol>
+     *   <li>Per-link {@code checksumBehavior} parameter in the configuration 
map (explicit override)</li>
+     *   <li>Endpoint-based default: {@link S3ChecksumBehavior#AUTO} for 
native AWS S3,
+     *       {@link S3ChecksumBehavior#WHEN_REQUIRED} for S3-compatible 
storage</li>
+     * </ol>
+     * Cloud properties are intentionally not consulted here — this method is 
for external data (link) operations
+     * only, not for blob storage.
+     */
+    public static S3ChecksumBehavior resolveChecksumBehavior(Map<String, 
String> configuration, String serviceEndpoint)
+            throws CompilationException {
+        String perLink = configuration.get(CHECKSUM_BEHAVIOR_FIELD_NAME);
+        if (perLink != null) {
+            try {
+                return S3ChecksumBehavior.fromString(perLink);
+            } catch (IllegalArgumentException e) {
+                throw new 
CompilationException(INVALID_PARAM_VALUE_ALLOWED_VALUE, 
CHECKSUM_BEHAVIOR_FIELD_NAME,
+                        CHECKSUM_BEHAVIOR_ALLOWED_VALUES);
+            }
+        }
+        return S3ChecksumBehavior.defaultForEndpoint(serviceEndpoint);
+    }
+
+    /**
+     * Validates a {@code checksumBehavior} string value.
+     * Throws {@link CompilationException} if the value is non-null and not a 
recognized enum value.
+     */
+    public static void validateChecksumBehavior(String value) throws 
CompilationException {
+        if (value == null) {
+            return;
+        }
+        try {
+            S3ChecksumBehavior.fromString(value);
+        } catch (IllegalArgumentException e) {
+            throw new CompilationException(INVALID_PARAM_VALUE_ALLOWED_VALUE, 
CHECKSUM_BEHAVIOR_FIELD_NAME,
+                    CHECKSUM_BEHAVIOR_ALLOWED_VALUES);
+        }
+    }
+
     public static boolean validateAndGetPathStyleAddressing(String 
pathStyleAddressing, String endpoint)
             throws CompilationException {
         if (pathStyleAddressing == null) {

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21213?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings?usp=email

Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: lumina
Gerrit-Change-Id: Id8fd6405c4aec2a79f469259339d4584612e2479
Gerrit-Change-Number: 21213
Gerrit-PatchSet: 1
Gerrit-Owner: Ritik Raj <[email protected]>

Reply via email to