amit-jain commented on code in PR #2558:
URL: https://github.com/apache/jackrabbit-oak/pull/2558#discussion_r2415694505
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3RequestDecorator.java:
##########
@@ -101,109 +117,96 @@ public GetObjectRequest decorate(final GetObjectRequest
request) {
* Set encryption in {@link PutObjectRequest}
*/
public PutObjectRequest decorate(PutObjectRequest request) {
- ObjectMetadata metadata = request.getMetadata() == null
- ? new ObjectMetadata()
- : request.getMetadata();
- switch (getDataEncryption()) {
+ PutObjectRequest.Builder builder = request.toBuilder();
+
+ DataEncryption encryption = getDataEncryption();
+
+ switch (encryption) {
case SSE_S3:
-
metadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
+ builder.serverSideEncryption(ServerSideEncryption.AES256);
break;
case SSE_KMS:
- metadata.setSSEAlgorithm(SSEAlgorithm.KMS.getAlgorithm());
- /*Set*/
- request.withSSEAwsKeyManagementParams(sseParams);
+ builder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
+ if (sseKmsKey != null) {
+ builder.ssekmsKeyId(sseKmsKey);
+ }
break;
case SSE_C:
- request.withSSECustomerKey(sseCustomerKey);
+
builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.toString());
+ if (sseCustomerKey != null) {
Review Comment:
what happens when sseCustomerKey == null?
Will any request work?
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3RequestDecorator.java:
##########
@@ -101,109 +117,96 @@ public GetObjectRequest decorate(final GetObjectRequest
request) {
* Set encryption in {@link PutObjectRequest}
*/
public PutObjectRequest decorate(PutObjectRequest request) {
- ObjectMetadata metadata = request.getMetadata() == null
- ? new ObjectMetadata()
- : request.getMetadata();
- switch (getDataEncryption()) {
+ PutObjectRequest.Builder builder = request.toBuilder();
+
+ DataEncryption encryption = getDataEncryption();
+
+ switch (encryption) {
case SSE_S3:
-
metadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
+ builder.serverSideEncryption(ServerSideEncryption.AES256);
break;
case SSE_KMS:
- metadata.setSSEAlgorithm(SSEAlgorithm.KMS.getAlgorithm());
- /*Set*/
- request.withSSEAwsKeyManagementParams(sseParams);
+ builder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
+ if (sseKmsKey != null) {
Review Comment:
what happens when sseKmsKey == null?
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3RequestDecorator.java:
##########
@@ -101,109 +117,96 @@ public GetObjectRequest decorate(final GetObjectRequest
request) {
* Set encryption in {@link PutObjectRequest}
*/
public PutObjectRequest decorate(PutObjectRequest request) {
- ObjectMetadata metadata = request.getMetadata() == null
- ? new ObjectMetadata()
- : request.getMetadata();
- switch (getDataEncryption()) {
+ PutObjectRequest.Builder builder = request.toBuilder();
+
+ DataEncryption encryption = getDataEncryption();
+
+ switch (encryption) {
case SSE_S3:
-
metadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
+ builder.serverSideEncryption(ServerSideEncryption.AES256);
break;
case SSE_KMS:
- metadata.setSSEAlgorithm(SSEAlgorithm.KMS.getAlgorithm());
- /*Set*/
- request.withSSEAwsKeyManagementParams(sseParams);
+ builder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
+ if (sseKmsKey != null) {
+ builder.ssekmsKeyId(sseKmsKey);
+ }
break;
case SSE_C:
- request.withSSECustomerKey(sseCustomerKey);
+
builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.toString());
+ if (sseCustomerKey != null) {
+ builder.sseCustomerKey(sseCustomerKey);
+
builder.sseCustomerKeyMD5(Utils.calculateMD5(sseCustomerKey));
+ }
break;
case NONE:
break;
}
- request.setMetadata(metadata);
- return request;
+ return builder.build();
}
/**
* Set encryption in {@link CopyObjectRequest}
*/
public CopyObjectRequest decorate(CopyObjectRequest request) {
- ObjectMetadata metadata = request.getNewObjectMetadata() == null
- ? new ObjectMetadata()
- : request.getNewObjectMetadata();;
+
+ CopyObjectRequest.Builder builder = request.toBuilder();
+
switch (getDataEncryption()) {
case SSE_S3:
-
metadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
+ builder.serverSideEncryption(ServerSideEncryption.AES256);
break;
case SSE_KMS:
- metadata.setSSEAlgorithm(SSEAlgorithm.KMS.getAlgorithm());
- request.withSSEAwsKeyManagementParams(sseParams);
+ builder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
+ if (sseKmsKey != null) {
Review Comment:
Q: same as above
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3RequestDecorator.java:
##########
@@ -101,109 +117,96 @@ public GetObjectRequest decorate(final GetObjectRequest
request) {
* Set encryption in {@link PutObjectRequest}
*/
public PutObjectRequest decorate(PutObjectRequest request) {
- ObjectMetadata metadata = request.getMetadata() == null
- ? new ObjectMetadata()
- : request.getMetadata();
- switch (getDataEncryption()) {
+ PutObjectRequest.Builder builder = request.toBuilder();
+
+ DataEncryption encryption = getDataEncryption();
+
+ switch (encryption) {
case SSE_S3:
-
metadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
+ builder.serverSideEncryption(ServerSideEncryption.AES256);
break;
case SSE_KMS:
- metadata.setSSEAlgorithm(SSEAlgorithm.KMS.getAlgorithm());
- /*Set*/
- request.withSSEAwsKeyManagementParams(sseParams);
+ builder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
+ if (sseKmsKey != null) {
+ builder.ssekmsKeyId(sseKmsKey);
+ }
break;
case SSE_C:
- request.withSSECustomerKey(sseCustomerKey);
+
builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.toString());
+ if (sseCustomerKey != null) {
+ builder.sseCustomerKey(sseCustomerKey);
+
builder.sseCustomerKeyMD5(Utils.calculateMD5(sseCustomerKey));
+ }
break;
case NONE:
break;
}
- request.setMetadata(metadata);
- return request;
+ return builder.build();
}
/**
* Set encryption in {@link CopyObjectRequest}
*/
public CopyObjectRequest decorate(CopyObjectRequest request) {
- ObjectMetadata metadata = request.getNewObjectMetadata() == null
- ? new ObjectMetadata()
- : request.getNewObjectMetadata();;
+
+ CopyObjectRequest.Builder builder = request.toBuilder();
+
switch (getDataEncryption()) {
case SSE_S3:
-
metadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
+ builder.serverSideEncryption(ServerSideEncryption.AES256);
break;
case SSE_KMS:
- metadata.setSSEAlgorithm(SSEAlgorithm.KMS.getAlgorithm());
- request.withSSEAwsKeyManagementParams(sseParams);
+ builder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
+ if (sseKmsKey != null) {
+ // sseParams is typically a KMS Key ID string in SDK 2.x.
+ builder.ssekmsKeyId(sseKmsKey);
+ }
break;
case SSE_C:
- metadata.setSSEAlgorithm(AES256.getAlgorithm());
-
request.withSourceSSECustomerKey(sseCustomerKey).withDestinationSSECustomerKey(sseCustomerKey);
+
builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.toString());
+
builder.copySourceSSECustomerAlgorithm(ServerSideEncryption.AES256.toString());
+ if (sseCustomerKey != null) {
Review Comment:
Q: same as above?
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3RequestDecorator.java:
##########
@@ -101,109 +117,96 @@ public GetObjectRequest decorate(final GetObjectRequest
request) {
* Set encryption in {@link PutObjectRequest}
*/
public PutObjectRequest decorate(PutObjectRequest request) {
- ObjectMetadata metadata = request.getMetadata() == null
- ? new ObjectMetadata()
- : request.getMetadata();
- switch (getDataEncryption()) {
+ PutObjectRequest.Builder builder = request.toBuilder();
+
+ DataEncryption encryption = getDataEncryption();
+
+ switch (encryption) {
case SSE_S3:
-
metadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
+ builder.serverSideEncryption(ServerSideEncryption.AES256);
break;
case SSE_KMS:
- metadata.setSSEAlgorithm(SSEAlgorithm.KMS.getAlgorithm());
- /*Set*/
- request.withSSEAwsKeyManagementParams(sseParams);
+ builder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
+ if (sseKmsKey != null) {
+ builder.ssekmsKeyId(sseKmsKey);
+ }
break;
case SSE_C:
- request.withSSECustomerKey(sseCustomerKey);
+
builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.toString());
+ if (sseCustomerKey != null) {
+ builder.sseCustomerKey(sseCustomerKey);
+
builder.sseCustomerKeyMD5(Utils.calculateMD5(sseCustomerKey));
+ }
break;
case NONE:
break;
}
- request.setMetadata(metadata);
- return request;
+ return builder.build();
}
/**
* Set encryption in {@link CopyObjectRequest}
*/
public CopyObjectRequest decorate(CopyObjectRequest request) {
- ObjectMetadata metadata = request.getNewObjectMetadata() == null
- ? new ObjectMetadata()
- : request.getNewObjectMetadata();;
+
+ CopyObjectRequest.Builder builder = request.toBuilder();
+
switch (getDataEncryption()) {
case SSE_S3:
-
metadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
+ builder.serverSideEncryption(ServerSideEncryption.AES256);
break;
case SSE_KMS:
- metadata.setSSEAlgorithm(SSEAlgorithm.KMS.getAlgorithm());
- request.withSSEAwsKeyManagementParams(sseParams);
+ builder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
+ if (sseKmsKey != null) {
+ // sseParams is typically a KMS Key ID string in SDK 2.x.
+ builder.ssekmsKeyId(sseKmsKey);
+ }
break;
case SSE_C:
- metadata.setSSEAlgorithm(AES256.getAlgorithm());
-
request.withSourceSSECustomerKey(sseCustomerKey).withDestinationSSECustomerKey(sseCustomerKey);
+
builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.toString());
+
builder.copySourceSSECustomerAlgorithm(ServerSideEncryption.AES256.toString());
+ if (sseCustomerKey != null) {
+ builder.sseCustomerKey(sseCustomerKey)
+ .copySourceSSECustomerKey(sseCustomerKey)
+
.copySourceSSECustomerKeyMD5(Utils.calculateMD5(sseCustomerKey))
+
.sseCustomerKeyMD5(Utils.calculateMD5(sseCustomerKey));
+ }
break;
case NONE:
break;
}
- request.setNewObjectMetadata(metadata);
- return request;
+ return builder.build();
}
- public InitiateMultipartUploadRequest
decorate(InitiateMultipartUploadRequest request) {
- ObjectMetadata metadata = request.getObjectMetadata() == null
- ? new ObjectMetadata()
- : request.getObjectMetadata();;
+ public CreateMultipartUploadRequest decorate(CreateMultipartUploadRequest
request) {
+
+ CreateMultipartUploadRequest.Builder builder = request.toBuilder();
+
switch (getDataEncryption()) {
case SSE_S3:
-
metadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
+ builder.serverSideEncryption(ServerSideEncryption.AES256);
break;
case SSE_KMS:
- metadata.setSSEAlgorithm(SSEAlgorithm.KMS.getAlgorithm());
- request.withSSEAwsKeyManagementParams(sseParams);
+ builder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
+ if (sseKmsKey != null) {
+ builder.ssekmsKeyId(sseKmsKey);
+ }
break;
case SSE_C:
- request.withSSECustomerKey(sseCustomerKey);
+
builder.sseCustomerAlgorithm(ServerSideEncryption.AES256.toString());
+ if (sseCustomerKey != null) {
Review Comment:
Q: Same as above.
Point is if it does not work why not throw in init itself.
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3Backend.java:
##########
@@ -316,57 +306,72 @@ void setBinaryTransferAccelerationEnabled(boolean
enabled) {
* method uses parallel concurrent connections to upload.
*/
@Override
- public void write(DataIdentifier identifier, File file)
- throws DataStoreException {
+ public void write(DataIdentifier identifier, File file) throws
DataStoreException {
String key = getKeyName(identifier);
- ObjectMetadata objectMetaData = null;
long start = System.currentTimeMillis();
ClassLoader contextClassLoader =
Thread.currentThread().getContextClassLoader();
try {
- Thread.currentThread().setContextClassLoader(
- getClass().getClassLoader());
+
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
// check if the same record already exists
+ HeadObjectResponse headMeta = null;
try {
- objectMetaData =
s3service.getObjectMetadata(s3ReqDecorator.decorate(new
GetObjectMetadataRequest(bucket, key)));
- } catch (AmazonServiceException ase) {
- if (!(ase.getStatusCode() == 404 || ase.getStatusCode() ==
403)) {
- throw ase;
+ headMeta = s3Client.headObject(s3ReqDecorator.decorate(
+ HeadObjectRequest.builder()
+ .bucket(bucket)
+ .key(key)
+ .build()));
+ } catch (S3Exception se) {
+ if (!(se instanceof NoSuchKeyException || se.statusCode() ==
404 || se.statusCode() == 403)) {
+ throw se;
}
}
- if (objectMetaData != null) {
- long l = objectMetaData.getContentLength();
+ if (headMeta != null) {
+ long l = headMeta.contentLength();
if (l != file.length()) {
- throw new DataStoreException("Collision: " + key
- + " new length: " + file.length() + " old length: " +
l);
+ throw new DataStoreException("Collision: " + key + " new
length: " + file.length() + " old length: " + l);
}
- LOG.debug("[{}]'s exists, lastmodified = [{}]", key,
- objectMetaData.getLastModified().getTime());
- CopyObjectRequest copReq = new CopyObjectRequest(bucket, key,
- bucket, key);
- LOG.warn("Object MetaData before copy: {}",
objectMetaData.getRawMetadata());
+ LOG.debug("[{}]'s exists, lastmodified = [{}]", key,
headMeta.lastModified().toEpochMilli());
+ CopyObjectRequest copyReq =
+ CopyObjectRequest.builder()
+ .sourceBucket(bucket)
+ .sourceKey(key)
+ .destinationBucket(bucket)
+ .destinationKey(key)
+ .metadataDirective(MetadataDirective.REPLACE)
+ .build();
+ LOG.warn("Object MetaData before copy: {}",
headMeta.metadata());
if (Objects.equals(RemoteStorageMode.S3,
properties.get(S3Constants.MODE))) {
Review Comment:
MetadataDirective.COPY cannot be used here? I think then this block is not
needed.
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3Backend.java:
##########
@@ -469,11 +463,20 @@ public void deleteRecord(DataIdentifier identifier)
@Override
public void close() {
// backend is closing. abort all mulitpart uploads from start.
- if(s3service.doesBucketExist(bucket)) {
- tmx.abortMultipartUploads(bucket, startTime);
+ try {
+ if(Utils.bucketExists(s3Client, bucket)) {
+ // List and abort multipart uploads initiated before startTime
+ Utils.abortMultipartUpload(bucket, startTime, s3Client);
+ }
+ } finally {
+ if (tmx != null) {
+ tmx.close();
+ }
+ if (s3AsyncClient != null) {
+ s3AsyncClient.close();
+ }
+ s3Client.close();
Review Comment:
As the code stands we should close the s3PresignService or the s3Utils
backing it, because it creates a new s3Client in some cases but with the
suggested changes it may be necessary.
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3Backend.java:
##########
@@ -391,21 +395,10 @@ public boolean exists(DataIdentifier identifier) throws
DataStoreException {
ClassLoader contextClassLoader =
Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
- ObjectMetadata objectMetaData =
s3service.getObjectMetadata(s3ReqDecorator.decorate(new
GetObjectMetadataRequest(bucket, key)));
- if (objectMetaData != null) {
- LOG.trace("exists [{}]: [true] took [{}] ms.",
- identifier, (System.currentTimeMillis() - start) );
- return true;
- }
- return false;
- } catch (AmazonServiceException e) {
- if (e.getStatusCode() == 404 || e.getStatusCode() == 403) {
- LOG.debug("exists [{}]: [false] took [{}] ms.",
- identifier, (System.currentTimeMillis() - start) );
- return false;
- }
- throw new DataStoreException(
- "Error occured to getObjectMetadata for key [" +
identifier.toString() + "]", e);
+ return Utils.objectExists(s3Client, bucket, key, s3ReqDecorator);
Review Comment:
It is a little problematic that a core method has been moved to th Utils
which was originally handling connection setup. For tests if required should
duplicate it to the DataStoreUtils to be used.
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3Backend.java:
##########
@@ -1346,34 +1359,51 @@ private static String getIdentifierName(String key) {
return key.substring(0, 4) + key.substring(5);
}
+ private void setRemoteStorageMode() {
Review Comment:
misplaced? Should it not be in the Utils?
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/Utils.java:
##########
@@ -67,70 +102,84 @@ public final class Utils {
public static final String AWSDOTCOM = "amazonaws.com";
public static final String S3 = "s3";
+ public static final String S3_ACCELERATION = "s3-accelerate";
public static final String DOT = ".";
public static final String DASH = "-";
/**
- * private constructor so that class cannot initialized from outside.
+ * private constructor so that class cannot initialize from outside.
*/
private Utils() {
}
/**
- * Create AmazonS3Client from properties.
- *
- * @param prop properties to configure @link {@link AmazonS3Client}
- * @return {@link AmazonS3Client}
+ * Create S3Client from properties.
+ *
+ * @param prop properties to configure @link {@link S3Client}
+ * @param accReq boolean indicating whether to accelerate requests
+ * @return {@link S3Client}
*/
- public static AmazonS3Client openService(final Properties prop) {
- String accessKey = prop.getProperty(S3Constants.ACCESS_KEY);
- String secretKey = prop.getProperty(S3Constants.SECRET_KEY);
- AmazonS3Client s3service = null;
- if (StringUtils.isEmpty(accessKey)
- || StringUtils.isEmpty(secretKey)) {
- LOG.info("Configuring Amazon Client from environment");
- s3service = new AmazonS3Client(getClientConfiguration(prop));
- } else {
- LOG.info("Configuring Amazon Client from property file.");
- AWSCredentials credentials = new BasicAWSCredentials(accessKey,
- secretKey);
- s3service = new AmazonS3Client(credentials,
- getClientConfiguration(prop));
- }
- String region = prop.getProperty(S3Constants.S3_REGION);
- String endpoint = null;
- String propEndPoint = prop.getProperty(S3Constants.S3_END_POINT);
- if ((propEndPoint != null) & !"".equals(propEndPoint)) {
- endpoint = propEndPoint;
- } else {
- if (StringUtils.isEmpty(region)) {
- com.amazonaws.regions.Region s3Region =
Regions.getCurrentRegion();
- if (s3Region != null) {
- region = s3Region.getName();
- } else {
- throw new AmazonClientException(
- "parameter ["
- + S3Constants.S3_REGION
- + "] not configured and cannot be derived
from environment");
- }
- }
- if (DEFAULT_AWS_BUCKET_REGION.equals(region)) {
- endpoint = S3 + DOT + AWSDOTCOM;
- } else {
- endpoint = S3 + DOT + region + DOT + AWSDOTCOM;
- }
- }
- /*
- * setting endpoint to remove latency of redirection. If endpoint is
- * not set, invocation first goes us standard region, which
- * redirects it to correct location.
- */
- s3service.setEndpoint(endpoint);
- LOG.info("S3 service endpoint [{}] ", endpoint);
- return s3service;
+ public static S3Client openService(final Properties prop, boolean accReq) {
+
+ S3ClientBuilder builder = S3Client.builder();
+
+ builder.credentialsProvider(getAwsCredentials(prop));
+ builder.overrideConfiguration(getClientConfiguration(prop));
+ builder.httpClient(getSdkHttpClient(prop));
+
+ String region = getRegion(prop);
+ builder.endpointOverride(getEndPointUri(prop, accReq, region));
+
+ // region is mandatory even with endpointOverride
+ builder.region(Region.of(region));
+ // to enable cross region bucket access
+ builder.crossRegionAccessEnabled(true);
Review Comment:
Do we need another property for this?
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/Utils.java:
##########
@@ -146,46 +195,71 @@ public static AmazonS3Client openService(final Properties
prop) {
* @param bucketName The name of the bucket to check.
* @return True if the bucket exists; false otherwise.
*/
- public static boolean waitForBucket(@NotNull final AmazonS3 s3Client,
@NotNull final String bucketName) {
+ public static boolean waitForBucket(@NotNull final S3Client s3Client,
final String bucketName) {
int tries = 0;
boolean bucketExists = false;
- while (20 > tries++) {
- bucketExists = s3Client.doesBucketExistV2(bucketName);
- if (bucketExists) break;
+ while (tries < 20) {
+ tries++;
try {
- Thread.sleep(100 * tries);
+ s3Client.headBucket(headReq ->
headReq.bucket(bucketName).build());
+ bucketExists = true;
+ break;
+ } catch (NoSuchBucketException e) {
+ // Bucket does not exist, wait and retry
+ try {
+ Thread.sleep(100L * tries);
+ } catch (InterruptedException ie) {}
+ } catch (Exception e) {
+ // For other exceptions, optionally handle or break
+ break;
}
- catch (InterruptedException e) { }
}
return bucketExists;
}
/**
* Delete S3 bucket. This method first deletes all objects from bucket and
* then delete empty bucket.
- *
+ *
* @param bucketName the bucket name.
*/
public static void deleteBucket(final String bucketName) throws
IOException {
Properties prop = readConfig(DEFAULT_CONFIG_FILE);
- AmazonS3 s3service = openService(prop);
- ObjectListing prevObjectListing = s3service.listObjects(bucketName);
- while (true) {
- for (S3ObjectSummary s3ObjSumm :
prevObjectListing.getObjectSummaries()) {
- s3service.deleteObject(bucketName, s3ObjSumm.getKey());
+ S3Client s3Client = openService(prop, false);
+
+ deleteBucketObjects(bucketName, prop, s3Client);
+
+ // Delete the actual bucket
+ s3Client.deleteBucket(delReq -> delReq.bucket(bucketName).build());
+ }
+
+ public static void deleteBucketAndAbortMultipartUploads(final String
bucket, final Date date, final Properties props) {
+ try (S3Client s3service = Utils.openService(props, false)) {
+ if (!Utils.bucketExists(s3service, bucket)) {
+ LOG.info("bucket [{}] doesn't exists", bucket);
+ return;
}
- if (!prevObjectListing.isTruncated()) {
- break;
+
+ for (int i = 0; i < 4; i++) {
+ abortMultipartUpload(bucket, date, s3service);
}
- prevObjectListing =
s3service.listNextBatchOfObjects(prevObjectListing);
+ // Delete objects in the bucket with pagination
+ deleteBucketObjects(bucket, props, s3service);
+ // Delete the bucket
+ s3service.deleteBucket(delBucket ->
delBucket.bucket(bucket).build());
+ LOG.info("bucket [ {} ] cleaned", bucket);
}
- s3service.deleteBucket(bucketName);
+ }
+
+ public static void deleteBucketObjects(final String bucket, final
Properties props, final S3Client s3service) {
+ ListObjectsV2Iterable listResponses =
s3service.listObjectsV2Paginator(builder -> builder.bucket(bucket).build());
+ deleteBucketObjects(bucket, props, s3service, listResponses);
Review Comment:
Don't see a reason why the logic cannot be inlined, is anybody calling this
method directly?
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/Utils.java:
##########
@@ -67,70 +102,84 @@ public final class Utils {
public static final String AWSDOTCOM = "amazonaws.com";
public static final String S3 = "s3";
+ public static final String S3_ACCELERATION = "s3-accelerate";
public static final String DOT = ".";
public static final String DASH = "-";
/**
- * private constructor so that class cannot initialized from outside.
+ * private constructor so that class cannot initialize from outside.
*/
private Utils() {
}
/**
- * Create AmazonS3Client from properties.
- *
- * @param prop properties to configure @link {@link AmazonS3Client}
- * @return {@link AmazonS3Client}
+ * Create S3Client from properties.
+ *
+ * @param prop properties to configure @link {@link S3Client}
+ * @param accReq boolean indicating whether to accelerate requests
+ * @return {@link S3Client}
*/
- public static AmazonS3Client openService(final Properties prop) {
- String accessKey = prop.getProperty(S3Constants.ACCESS_KEY);
- String secretKey = prop.getProperty(S3Constants.SECRET_KEY);
- AmazonS3Client s3service = null;
- if (StringUtils.isEmpty(accessKey)
- || StringUtils.isEmpty(secretKey)) {
- LOG.info("Configuring Amazon Client from environment");
- s3service = new AmazonS3Client(getClientConfiguration(prop));
- } else {
- LOG.info("Configuring Amazon Client from property file.");
- AWSCredentials credentials = new BasicAWSCredentials(accessKey,
- secretKey);
- s3service = new AmazonS3Client(credentials,
- getClientConfiguration(prop));
- }
- String region = prop.getProperty(S3Constants.S3_REGION);
- String endpoint = null;
- String propEndPoint = prop.getProperty(S3Constants.S3_END_POINT);
- if ((propEndPoint != null) & !"".equals(propEndPoint)) {
- endpoint = propEndPoint;
- } else {
- if (StringUtils.isEmpty(region)) {
- com.amazonaws.regions.Region s3Region =
Regions.getCurrentRegion();
- if (s3Region != null) {
- region = s3Region.getName();
- } else {
- throw new AmazonClientException(
- "parameter ["
- + S3Constants.S3_REGION
- + "] not configured and cannot be derived
from environment");
- }
- }
- if (DEFAULT_AWS_BUCKET_REGION.equals(region)) {
- endpoint = S3 + DOT + AWSDOTCOM;
- } else {
- endpoint = S3 + DOT + region + DOT + AWSDOTCOM;
- }
- }
- /*
- * setting endpoint to remove latency of redirection. If endpoint is
- * not set, invocation first goes us standard region, which
- * redirects it to correct location.
- */
- s3service.setEndpoint(endpoint);
- LOG.info("S3 service endpoint [{}] ", endpoint);
- return s3service;
+ public static S3Client openService(final Properties prop, boolean accReq) {
Review Comment:
Why is accelerate request not in properties and a separate parameter?
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3Backend.java:
##########
@@ -236,18 +223,17 @@ public void init() throws DataStoreException {
}
String enablePresignedAccelerationStr =
properties.getProperty(S3Constants.PRESIGNED_URI_ENABLE_ACCELERATION);
-
setBinaryTransferAccelerationEnabled(enablePresignedAccelerationStr != null &&
"true".equals(enablePresignedAccelerationStr));
+
setBinaryTransferAccelerationEnabled("true".equals(enablePresignedAccelerationStr));
Review Comment:
Why can't this block be moved at the start and accelerate prop added and
then rest of init takes place as normal, we will avoid opening the diff service
object again.
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/Utils.java:
##########
@@ -216,54 +290,384 @@ public static Properties readConfig(String fileName)
throws IOException {
return prop;
}
- private static void deleteIfPossible(final File file) {
- boolean deleted = file.delete();
- if (!deleted) {
- LOG.warn("Could not delete " + file.getAbsolutePath());
+ public static Map<String, Object> asMap(Properties props) {
+ Map<String, Object> map = new HashMap<>();
+ for (Object key : props.keySet()) {
+ map.put((String)key, props.get(key));
}
+ return map;
}
- private static ClientConfiguration getClientConfiguration(Properties prop)
{
- int connectionTimeOut =
Integer.parseInt(prop.getProperty(S3Constants.S3_CONN_TIMEOUT));
- int socketTimeOut =
Integer.parseInt(prop.getProperty(S3Constants.S3_SOCK_TIMEOUT));
- int maxConnections =
Integer.parseInt(prop.getProperty(S3Constants.S3_MAX_CONNS));
+ /**
+ * Determines the data encryption type to use for S3 operations based on
the provided properties.
+ * <p>
+ * If the property {@code S3_ENCRYPTION} is set, returns the corresponding
{@link DataEncryption} enum value.
+ * If the property is not set or is invalid, returns {@link
DataEncryption#NONE}.
+ *
+ * @param props the properties containing S3 configuration
+ * @return the {@link DataEncryption} type to use
+ */
+ public static DataEncryption getDataEncryption(final Properties props) {
+ final String encryptionType = props.getProperty(S3_ENCRYPTION);
+ DataEncryption encryption = DataEncryption.NONE;
+ if (encryptionType != null) {
+ encryption = DataEncryption.valueOf(encryptionType);
+ }
+ return encryption;
+ }
+
+ /**
+ * Determines the AWS region to use based on the provided properties.
+ * <p>
+ * The method attempts to extract the region in the following order:
+ * <ol>
+ * <li>If the S3 endpoint property is set, tries to parse the region
from the endpoint URL.</li>
+ * <li>If not found, uses the S3 region property.</li>
+ * <li>If the region property is empty, falls back to the default region
from the environment.</li>
+ * <li>If the region is "us-standard", returns "us-east-1".</li>
+ * </ol>
+ *
+ * @param prop the properties containing S3 configuration
+ * @return the AWS region as a string
+ */
+ static String getRegion(final Properties prop) {
+
+ String region = null;
+
+ if (Objects.nonNull(prop.getProperty(S3Constants.S3_END_POINT))) {
+ region =
getRegionFromEndpoint(prop.getProperty(S3Constants.S3_END_POINT));
+ }
+
+ if (Objects.nonNull(region)) {
+ return region;
+ }
+
+ region = prop.getProperty(S3Constants.S3_REGION);
+
+ if (region.isEmpty()) {
+ region = Utils.getDefaultRegion();
+ }
+
+ if (Objects.equals(DEFAULT_AWS_BUCKET_REGION, region)) {
+ return US_EAST_1_AWS_BUCKET_REGION;
+ }
+ return region;
+ }
+
+ /**
+ * Extracts the AWS region from a given S3 endpoint URL.
+ * <p>
+ * Supports both path-style and virtual-hosted-style S3 endpoints, e.g.:
+ * <ul>
+ * <li>https://s3.eu-west-1.amazonaws.com</li>
+ * <li>https://bucket.s3.eu-west-1.amazonaws.com</li>
+ * <li>https://s3.amazonaws.com (returns us-east-1)</li>
+ * </ul>
+ * If the region cannot be determined, returns null.
+ *
+ * @param endpoint the S3 endpoint URL as a string
+ * @return the AWS region string, or null if not found
+ */
+ static String getRegionFromEndpoint(final String endpoint) {
+ try {
+ URI uri = URI.create(endpoint);
+ String host = uri.getHost();
+
+ // Pattern for standard S3 endpoints: s3.region.amazonaws.com or
bucket.s3.region.amazonaws.com
+ Pattern pattern =
Pattern.compile("s3[.-]([a-z0-9-]+)\\.amazonaws\\.com");
+ Matcher matcher = pattern.matcher(host);
+
+ if (matcher.find()) {
+ return matcher.group(1);
+ }
+
+ // Handle us-east-1 special case (s3.amazonaws.com)
+ // Handle virtual-hosted-style URLs: bucket.s3.amazonaws.com
+ if (host.equals("s3.amazonaws.com") ||
host.endsWith(".s3.amazonaws.com")) {
+ return Region.US_EAST_1.id();
+ }
+
+ LOG.warn("Cannot parse region from endpoint: {}", endpoint);
+
+ return null;
+
+ } catch (Exception e) {
+ LOG.error("Invalid endpoint format: {}", endpoint);
+ return null;
+ }
+ }
+
+ static void deleteBucketObjects(final String bucket, final Properties
props, final S3Client s3service,
+ final ListObjectsV2Iterable
listResponses) {
+ for (ListObjectsV2Response listRes : listResponses) {
+ List<ObjectIdentifier> deleteList = new ArrayList<>();
+ List<String> keysToDelete = new ArrayList<>();
+ for (S3Object s3Obj : listRes.contents()) {
+
deleteList.add(ObjectIdentifier.builder().key(s3Obj.key()).build());
+ keysToDelete.add(s3Obj.key());
+ }
+
+ if (!deleteList.isEmpty()) {
+ RemoteStorageMode mode = getMode(props);
+ if (mode == RemoteStorageMode.S3) {
+ s3service.deleteObjects(delReq ->
+ delReq.bucket(bucket)
+ .delete(delObj ->
+ delObj.objects(deleteList)
+ .build())
+ .build());
+ } else {
+ // Delete objects one by one
+ keysToDelete.forEach(key -> s3service.deleteObject(delObj
-> delObj
+ .bucket(bucket)
+ .key(key)
+ .build()));
+
+ }
Review Comment:
Won't this work?
```suggestion
for (ListObjectsV2Response listRes : listResponses) {
List<ObjectIdentifier> deleteList = new ArrayList<>();
for (S3Object s3Obj : listRes.contents()) {
deleteList.add(ObjectIdentifier.builder().key(s3Obj.key()).build());
}
if (!deleteList.isEmpty()) {
RemoteStorageMode mode = getMode(props);
if (mode == RemoteStorageMode.S3) {
s3service.deleteObjects(delReq ->
delReq.bucket(bucket)
.delete(delObj ->
delObj.objects(deleteList)));
} else {
// Delete objects one by one
deleteList.forEach(obj ->
s3service.deleteObject(delReq ->
delReq.bucket(bucket).key(obj.key())));
}
}
```
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3Backend.java:
##########
@@ -1075,80 +1110,52 @@ private URI createPresignedURI(DataIdentifier
identifier,
* Returns an iterator over the S3 objects
* @param <T>
*/
- class RecordsIterator<T> extends AbstractIterator<T> {
- ObjectListing prevObjectListing;
- Queue<S3ObjectSummary> queue;
- long size;
- Function<S3ObjectSummary, T> transformer;
-
- public RecordsIterator (Function<S3ObjectSummary, T> transformer) {
- queue = new LinkedList<>();
- this.transformer = transformer;
- }
-
- @Override
- protected T computeNext() {
- if (queue.isEmpty()) {
- loadBatch();
- }
-
- while (queue.isEmpty() && prevObjectListing.getNextMarker() !=
null) {
- LOG.debug("Queue is empty, but there is more data in the S3
bucket");
- loadBatch();
- }
+ public class RecordsIterator<T> implements Iterator<T> {
+ private final Iterator<T> objectIterator;
+ private T nextElement = null;
+ private boolean computedNext = false;
- if (!queue.isEmpty()) {
- return transformer.apply(queue.remove());
- }
-
- return endOfData();
- }
+ public RecordsIterator(Function<S3Object, T> transformer) {
Review Comment:
looks to me it is completelty unnecessary now with the new sdk.
Maybe we can add something like below and in both the use cases invoke it
the coreect transformer.
`
ListObjectsV2Iterable listing = s3Client.listObjectsV2Paginator(builder
-> {
builder.bucket(bucket);
if (properties.containsKey(S3Constants.MAX_KEYS)) {
builder.maxKeys(Integer.valueOf(properties.getProperty(S3Constants.MAX_KEYS)));
}
});
return IteratorUtils.transform(
IteratorUtils.filter(
listing.contents().iterator(),
s3Object -> !s3Object.key().startsWith(META_KEY_PREFIX)
),
transformer
);
`
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3Backend.java:
##########
@@ -624,29 +633,13 @@ public void deleteAllMetadataRecords(String prefix) {
ClassLoader contextClassLoader =
Thread.currentThread().getContextClassLoader();
try {
- Thread.currentThread().setContextClassLoader(
- getClass().getClassLoader());
+
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
- ListObjectsRequest listObjectsRequest =
- new
ListObjectsRequest().withBucketName(bucket).withPrefix(addMetaKeyPrefix(prefix));
- ObjectListing metaList = s3service.listObjects(listObjectsRequest);
- List<DeleteObjectsRequest.KeyVersion> deleteList = new
ArrayList<DeleteObjectsRequest.KeyVersion>();
- List<String> keysToDelete = new ArrayList<>();
- for (S3ObjectSummary s3ObjSumm : metaList.getObjectSummaries()) {
- deleteList.add(new
DeleteObjectsRequest.KeyVersion(s3ObjSumm.getKey()));
- keysToDelete.add(s3ObjSumm.getKey());
- }
- if (!deleteList.isEmpty()) {
- RemoteStorageMode mode = (RemoteStorageMode)
properties.getOrDefault(S3Constants.MODE, RemoteStorageMode.S3);
- if (mode == RemoteStorageMode.S3) {
- DeleteObjectsRequest delObjsReq = new
DeleteObjectsRequest(bucket);
- delObjsReq.setKeys(deleteList);
- s3service.deleteObjects(delObjsReq);
- } else {
- // GCP does not support bulk delete operations, hence we
need to delete each object individually
- keysToDelete.forEach(key -> s3service.deleteObject(bucket,
key));
- }
- }
+ ListObjectsV2Iterable metaList =
s3Client.listObjectsV2Paginator(builder -> builder
+ .bucket(bucket)
+ .prefix(addMetaKeyPrefix(prefix))
+ .build());
+ Utils.deleteBucketObjects(bucket, properties, s3Client, metaList);
Review Comment:
same case as Utils.objectExists, for other invocations as well
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/S3Backend.java:
##########
@@ -496,39 +499,41 @@ public void setProperties(Properties properties) {
setRemoteStorageMode();
}
- private void setRemoteStorageMode() {
- String s3EndPoint = properties.getProperty(S3Constants.S3_END_POINT,
"");
- if (s3EndPoint.contains("googleapis")) {
- if (properties.get(S3Constants.MODE) == RemoteStorageMode.S3) {
- LOG.warn("Mismatch between remote storage mode and s3EndPoint,
overriding mode to GCP");
- }
- properties.put(S3Constants.MODE, RemoteStorageMode.GCP);
- return;
- }
- // default mode is S3
- properties.put(S3Constants.MODE, RemoteStorageMode.S3);
- }
-
@Override
public void addMetadataRecord(final InputStream input, final String name)
throws DataStoreException {
checkArgument(input != null, "input should not be null");
checkArgument(!StringUtils.isEmpty(name), "name should not be empty");
ClassLoader contextClassLoader =
Thread.currentThread().getContextClassLoader();
+ // Executor required to handle reading from the InputStream on a
separate thread so the main upload is not blocked.
+ final ExecutorService executor = Executors.newSingleThreadExecutor();
Review Comment:
Can we setup a pool at the beginning and close in the close()?
##########
oak-blob-cloud/src/main/java/org/apache/jackrabbit/oak/blob/cloud/s3/Utils.java:
##########
@@ -216,54 +290,384 @@ public static Properties readConfig(String fileName)
throws IOException {
return prop;
}
- private static void deleteIfPossible(final File file) {
- boolean deleted = file.delete();
- if (!deleted) {
- LOG.warn("Could not delete " + file.getAbsolutePath());
+ public static Map<String, Object> asMap(Properties props) {
+ Map<String, Object> map = new HashMap<>();
+ for (Object key : props.keySet()) {
+ map.put((String)key, props.get(key));
}
+ return map;
}
- private static ClientConfiguration getClientConfiguration(Properties prop)
{
- int connectionTimeOut =
Integer.parseInt(prop.getProperty(S3Constants.S3_CONN_TIMEOUT));
- int socketTimeOut =
Integer.parseInt(prop.getProperty(S3Constants.S3_SOCK_TIMEOUT));
- int maxConnections =
Integer.parseInt(prop.getProperty(S3Constants.S3_MAX_CONNS));
+ /**
+ * Determines the data encryption type to use for S3 operations based on
the provided properties.
+ * <p>
+ * If the property {@code S3_ENCRYPTION} is set, returns the corresponding
{@link DataEncryption} enum value.
+ * If the property is not set or is invalid, returns {@link
DataEncryption#NONE}.
+ *
+ * @param props the properties containing S3 configuration
+ * @return the {@link DataEncryption} type to use
+ */
+ public static DataEncryption getDataEncryption(final Properties props) {
+ final String encryptionType = props.getProperty(S3_ENCRYPTION);
+ DataEncryption encryption = DataEncryption.NONE;
+ if (encryptionType != null) {
+ encryption = DataEncryption.valueOf(encryptionType);
+ }
+ return encryption;
+ }
+
+ /**
+ * Determines the AWS region to use based on the provided properties.
+ * <p>
+ * The method attempts to extract the region in the following order:
+ * <ol>
+ * <li>If the S3 endpoint property is set, tries to parse the region
from the endpoint URL.</li>
+ * <li>If not found, uses the S3 region property.</li>
+ * <li>If the region property is empty, falls back to the default region
from the environment.</li>
+ * <li>If the region is "us-standard", returns "us-east-1".</li>
+ * </ol>
+ *
+ * @param prop the properties containing S3 configuration
+ * @return the AWS region as a string
+ */
+ static String getRegion(final Properties prop) {
+
+ String region = null;
+
+ if (Objects.nonNull(prop.getProperty(S3Constants.S3_END_POINT))) {
+ region =
getRegionFromEndpoint(prop.getProperty(S3Constants.S3_END_POINT));
+ }
+
+ if (Objects.nonNull(region)) {
+ return region;
+ }
+
+ region = prop.getProperty(S3Constants.S3_REGION);
+
+ if (region.isEmpty()) {
+ region = Utils.getDefaultRegion();
+ }
+
+ if (Objects.equals(DEFAULT_AWS_BUCKET_REGION, region)) {
+ return US_EAST_1_AWS_BUCKET_REGION;
+ }
+ return region;
+ }
+
+ /**
+ * Extracts the AWS region from a given S3 endpoint URL.
+ * <p>
+ * Supports both path-style and virtual-hosted-style S3 endpoints, e.g.:
+ * <ul>
+ * <li>https://s3.eu-west-1.amazonaws.com</li>
+ * <li>https://bucket.s3.eu-west-1.amazonaws.com</li>
+ * <li>https://s3.amazonaws.com (returns us-east-1)</li>
+ * </ul>
+ * If the region cannot be determined, returns null.
+ *
+ * @param endpoint the S3 endpoint URL as a string
+ * @return the AWS region string, or null if not found
+ */
+ static String getRegionFromEndpoint(final String endpoint) {
+ try {
+ URI uri = URI.create(endpoint);
+ String host = uri.getHost();
+
+ // Pattern for standard S3 endpoints: s3.region.amazonaws.com or
bucket.s3.region.amazonaws.com
+ Pattern pattern =
Pattern.compile("s3[.-]([a-z0-9-]+)\\.amazonaws\\.com");
+ Matcher matcher = pattern.matcher(host);
+
+ if (matcher.find()) {
+ return matcher.group(1);
+ }
+
+ // Handle us-east-1 special case (s3.amazonaws.com)
+ // Handle virtual-hosted-style URLs: bucket.s3.amazonaws.com
+ if (host.equals("s3.amazonaws.com") ||
host.endsWith(".s3.amazonaws.com")) {
+ return Region.US_EAST_1.id();
+ }
+
+ LOG.warn("Cannot parse region from endpoint: {}", endpoint);
+
+ return null;
+
+ } catch (Exception e) {
+ LOG.error("Invalid endpoint format: {}", endpoint);
+ return null;
+ }
+ }
+
+ static void deleteBucketObjects(final String bucket, final Properties
props, final S3Client s3service,
+ final ListObjectsV2Iterable
listResponses) {
+ for (ListObjectsV2Response listRes : listResponses) {
+ List<ObjectIdentifier> deleteList = new ArrayList<>();
+ List<String> keysToDelete = new ArrayList<>();
+ for (S3Object s3Obj : listRes.contents()) {
+
deleteList.add(ObjectIdentifier.builder().key(s3Obj.key()).build());
+ keysToDelete.add(s3Obj.key());
+ }
+
+ if (!deleteList.isEmpty()) {
+ RemoteStorageMode mode = getMode(props);
+ if (mode == RemoteStorageMode.S3) {
+ s3service.deleteObjects(delReq ->
+ delReq.bucket(bucket)
+ .delete(delObj ->
+ delObj.objects(deleteList)
+ .build())
+ .build());
+ } else {
+ // Delete objects one by one
+ keysToDelete.forEach(key -> s3service.deleteObject(delObj
-> delObj
+ .bucket(bucket)
+ .key(key)
+ .build()));
+
+ }
+ }
+ }
+ }
+
+ static void abortMultipartUpload(String bucket, Date date, S3Client
s3Client) {
+ ListMultipartUploadsIterable multiUploadsResponse =
s3Client.listMultipartUploadsPaginator(listReq ->
+ listReq.bucket(bucket).build());
+ for (MultipartUpload multipartUpload : multiUploadsResponse.uploads())
{
+ Instant initiated = multipartUpload.initiated();
+ if (initiated.isBefore(date.toInstant())) {
+ s3Client.abortMultipartUpload(abortReq -> abortReq
+ .bucket(bucket)
+ .key(multipartUpload.key())
+ .uploadId(multipartUpload.uploadId())
+ .build());
+ }
+ }
+ }
+
+ /**
+ * Checks if the specified Amazon S3 bucket exists and is accessible with
the provided S3 client.
+ * <p>
+ * This method attempts to perform a {@code headBucket} request. If the
bucket exists and is accessible,
+ * it returns {@code true}. If the bucket does not exist, it returns
{@code false}.
+ * Other unexpected exceptions (such as permission errors or network
issues) will propagate.
+ * </p>
+ *
+ * @param s3Client the {@link S3Client} to use for the request
+ * @param bucketName the name of the S3 bucket to check
+ * @return {@code true} if the bucket exists and is accessible; {@code
false} if the bucket does not exist
+ * @throws NullPointerException if {@code s3Client} or {@code bucketName}
is null
+ * @throws S3Exception if an AWS error other than {@link
NoSuchBucketException} occurs
+ */
+ static boolean bucketExists(final S3Client s3Client, final String
bucketName) {
+ try {
+ s3Client.headBucket(request -> request.bucket(bucketName));
+ return true;
+ }
+ catch (NoSuchBucketException exception) {
+ return false;
+ }
+ }
+
+ /**
+ * Checks if a specific object exists in the given S3 bucket.
+ * <p>
+ * Performs a {@code headObject} request using the provided {@link
S3RequestDecorator}.
+ * Returns {@code true} if the object exists, {@code false} if it does not
(404 or 403).
+ * Other {@link S3Exception}s are propagated.
+ * </p>
+ *
+ * @param s3Client the {@link S3Client} to use for the request
+ * @param bucket the S3 bucket name
+ * @param key the object key
+ * @param s3ReqDecorator decorator for the {@link HeadObjectRequest}
+ * @return {@code true} if the object exists, {@code false} if not found
or forbidden
+ * @throws S3Exception for AWS errors other than 404 or 403
+ */
+ static boolean objectExists(final S3Client s3Client, final String bucket,
final String key, S3RequestDecorator s3ReqDecorator) {
+ try {
+
s3Client.headObject(s3ReqDecorator.decorate(HeadObjectRequest.builder().bucket(bucket).key(key).build()));
+
+ LOG.debug("Object {} exists in bucket {}", key, bucket);
+ return true;
+ } catch (S3Exception e) {
+ if (e.statusCode() == 404 || e.statusCode() == 403) {
+ LOG.info("Object {} doesn't exists in bucket {}", key, bucket);
+ return false;
+ } else {
+ throw e;
+ }
+ }
+ }
+
+ /**
+ * Constructs the S3 endpoint URI based on the provided properties,
acceleration flag, and region.
+ * <p>
+ * The method determines the endpoint as follows:
+ * <ol>
+ * <li>If the S3 endpoint property is set, uses it directly.</li>
+ * <li>Otherwise, constructs the endpoint using the region and
acceleration flag.</li>
+ * <li>The protocol is taken from the properties or defaults to
"https".</li>
+ * </ol>
+ *
+ * @param prop the properties containing S3 configuration
+ * @param accReq whether to use S3 acceleration endpoint
+ * @param region the AWS region
+ * @return the constructed S3 endpoint URI
+ */
+ @NotNull
+ static URI getEndPointUri(Properties prop, boolean accReq, String region) {
Review Comment:
as above why can't accelerate request not be a part of props?
##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/fixture/DataStoreUtils.java:
##########
@@ -112,40 +108,11 @@ public static void cleanup(File storeDir, Map<String, ?>
config, String bucket)
}
}
- public static void deleteBucket(String bucket, Map<String, ?> map, Date
date) throws Exception {
- log.info("cleaning bucket [" + bucket + "]");
+ public static void deleteBucket(String bucket, Map<String, ?> map, Date
date) {
+ log.info("cleaning bucket [ {} ]", bucket);
Properties props = new Properties();
props.putAll(map);
- AmazonS3Client s3service = Utils.openService(props);
- TransferManager tmx = new TransferManager(s3service);
- if (s3service.doesBucketExist(bucket)) {
- for (int i = 0; i < 4; i++) {
- tmx.abortMultipartUploads(bucket, date);
- ObjectListing prevObjectListing =
s3service.listObjects(bucket);
- while (prevObjectListing != null) {
- List<DeleteObjectsRequest.KeyVersion>
- deleteList = new
ArrayList<DeleteObjectsRequest.KeyVersion>();
- for (S3ObjectSummary s3ObjSumm :
prevObjectListing.getObjectSummaries()) {
- deleteList.add(new DeleteObjectsRequest.KeyVersion(
- s3ObjSumm.getKey()));
- }
- if (deleteList.size() > 0) {
- DeleteObjectsRequest delObjsReq = new
DeleteObjectsRequest(
- bucket);
- delObjsReq.setKeys(deleteList);
- s3service.deleteObjects(delObjsReq);
- }
- if (!prevObjectListing.isTruncated()) break;
- prevObjectListing =
s3service.listNextBatchOfObjects(prevObjectListing);
- }
- }
- s3service.deleteBucket(bucket);
- log.info("bucket [ " + bucket + "] cleaned");
- } else {
- log.info("bucket [" + bucket + "] doesn't exists");
- }
- tmx.shutdownNow();
- s3service.shutdown();
+ Utils.deleteBucketAndAbortMultipartUploads(bucket, date, props);
Review Comment:
I think having a core crud kind of method in Utils is not a good place
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]