[GitHub] [solr] HoustonPutman commented on a change in pull request #120: SOLR-15089: Allow backup/restoration to Amazon's S3 blobstore

GitBox Mon, 09 Aug 2021 12:20:18 -0700


HoustonPutman commented on a change in pull request #120:
URL: https://github.com/apache/solr/pull/120#discussion_r685457939




##########
File path: 
solr/contrib/s3-repository/src/java/org/apache/solr/s3/S3StorageClient.java
##########
@@ -0,0 +1,568 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.s3;
+
+import com.amazonaws.AmazonClientException;
+import com.amazonaws.AmazonServiceException;
+import com.amazonaws.ClientConfiguration;
+import com.amazonaws.Protocol;
+import com.amazonaws.client.builder.AwsClientBuilder;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.model.DeleteObjectsRequest;
+import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion;
+import com.amazonaws.services.s3.model.DeleteObjectsResult;
+import com.amazonaws.services.s3.model.ListObjectsRequest;
+import com.amazonaws.services.s3.model.ObjectListing;
+import com.amazonaws.services.s3.model.ObjectMetadata;
+import com.amazonaws.services.s3.model.PutObjectRequest;
+import com.amazonaws.services.s3.model.S3Object;
+import com.amazonaws.services.s3.model.S3ObjectSummary;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
+import org.apache.commons.io.input.ClosedInputStream;
+import org.apache.solr.common.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Closeable;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+
+/**
+ * Creates a {@link AmazonS3} for communicating with AWS S3. Utilizes the 
default credential provider chain;
+ * reference <a 
href="https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html";>AWS
 SDK docs</a> for
+ * details on where this client will fetch credentials from, and the order of 
precedence.
+ */
+class S3StorageClient {
+
+    private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+    static final String BLOB_FILE_PATH_DELIMITER = "/";
+
+    // S3 has a hard limit of 1000 keys per batch delete request
+    private static final int MAX_KEYS_PER_BATCH_DELETE = 1000;
+
+    // Metadata name used to identify flag directory entries in S3
+    private static final String BLOB_DIR_CONTENT_TYPE = 
"application/x-directory";
+
+    // Error messages returned by S3 for a key not found.
+    private static final Set<String> NOT_FOUND_CODES = Set.of("NoSuchKey", 
"404 Not Found");
+
+    private final AmazonS3 s3Client;
+
+    /**
+     * The S3 bucket where we write all of our blobs to.
+     */
+    private final String bucketName;
+
+    S3StorageClient(String bucketName, String region, String proxyHost, int 
proxyPort, String endpoint) {
+        this(createInternalClient(region, proxyHost, proxyPort, endpoint), 
bucketName);
+    }
+
+    @VisibleForTesting
+    S3StorageClient(AmazonS3 s3Client, String bucketName) {
+        this.s3Client = s3Client;
+        this.bucketName = bucketName;
+    }
+
+    private static AmazonS3 createInternalClient(String region, String 
proxyHost, int proxyPort, String endpoint) {
+        ClientConfiguration clientConfig = new ClientConfiguration()
+            .withProtocol(Protocol.HTTPS);
+
+        // If configured, add proxy
+        if (!StringUtils.isEmpty(proxyHost)) {
+            clientConfig.setProxyHost(proxyHost);
+            if (proxyPort > 0) {
+                clientConfig.setProxyPort(proxyPort);
+            }
+        }
+
+        /*
+         * Default s3 client builder loads credentials from disk and handles 
token refreshes
+         */
+        AmazonS3ClientBuilder clientBuilder = AmazonS3ClientBuilder.standard()
+            .enablePathStyleAccess()
+            .withClientConfiguration(clientConfig);
+
+        if (!StringUtils.isEmpty(endpoint)) {
+            clientBuilder.setEndpointConfiguration(
+                new AwsClientBuilder.EndpointConfiguration(endpoint, region)
+            );
+        } else {
+            clientBuilder.setRegion(region);
+        }
+
+        return clientBuilder.build();
+    }
+
+    /**
+     * Create Directory in S3 Blob Store.
+     *
+     * @param path Directory Path in Blob Store.
+     */
+    void createDirectory(String path) throws S3Exception {
+        path = sanitizedDirPath(path);
+
+        if (!parentDirectoryExist(path)) {
+            createDirectory(getParentDirectory(path));
+            //TODO see https://issues.apache.org/jira/browse/SOLR-15359
+//            throw new BlobException("Parent directory doesn't exist, path=" 
+ path);
+        }
+
+        ObjectMetadata objectMetadata = new ObjectMetadata();
+        objectMetadata.setContentType(BLOB_DIR_CONTENT_TYPE);
+        objectMetadata.setContentLength(0);
+
+        // Create empty blob object with header
+        final InputStream im = ClosedInputStream.CLOSED_INPUT_STREAM;
+
+        try {
+            PutObjectRequest putRequest = new PutObjectRequest(bucketName, 
path, im, objectMetadata);
+            s3Client.putObject(putRequest);
+        } catch (AmazonClientException ase) {
+            throw handleAmazonException(ase);
+        }
+    }
+
+    /**
+     /**
+     * Delete files from S3 Blob Store. Deletion order is not guaranteed.
+     *
+     * @param paths Paths to files or blobs.
+     */
+    void delete(Collection<String> paths) throws S3Exception {
+        Set<String> entries = new HashSet<>();
+        for (String path : paths) {
+            entries.add(sanitizedFilePath(path));
+        }
+
+        deleteBlobs(entries);
+    }
+
+    /**
+     * Delete directory, all the files and sub-directories from S3.
+     *
+     * @param path Path to directory in S3.
+     */
+    void deleteDirectory(String path) throws S3Exception {
+        path = sanitizedDirPath(path);
+
+        Set<String> entries = new HashSet<>();
+        if (pathExists(path)) {
+            entries.add(path);
+        }
+
+        // Get all the files and subdirectories
+        entries.addAll(listAll(path));
+
+        deleteObjects(entries);
+    }
+
+    /**
+     * List all the files and sub-directories directly under given path.
+     *
+     * @param path Path to directory in S3.
+     * @return Files and sub-directories in path.
+     */
+    String[] listDir(String path) throws S3Exception {
+        path = sanitizedDirPath(path);
+
+        String prefix = path;
+        ListObjectsRequest listRequest = new ListObjectsRequest()
+            .withBucketName(bucketName)
+            .withPrefix(prefix)
+            .withDelimiter(BLOB_FILE_PATH_DELIMITER);
+
+        List<String> entries = new ArrayList<>();
+        try {
+            ObjectListing objectListing = s3Client.listObjects(listRequest);
+
+            while (true) {
+                List<String> files = 
objectListing.getObjectSummaries().stream()
+                        .map(S3ObjectSummary::getKey)
+                        .collect(Collectors.toList());
+                files.addAll(objectListing.getCommonPrefixes());
+                // This filtering is needed only for S3mock. Real S3 does not 
ignore the trailing '/' in the prefix.
+                files = files.stream()
+                    .filter(s -> s.startsWith(prefix))
+                    .map(s -> s.substring(prefix.length()))
+                    .filter(s -> !s.isEmpty())
+                    .filter(s -> {
+                        int slashIndex = s.indexOf(BLOB_FILE_PATH_DELIMITER);
+                        return slashIndex == -1 || slashIndex == s.length() - 
1;
+                    })
+                    .map(s -> {

Review comment:
       Ok tested it out locally and it worked for me (backed up twice, listed 
the backups, deleted one of them, and restored the remaining one). This is all 
incremental, maybe the non-incremental breaks.... let me try that.
   
   Would you mind sharing your error?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

[GitHub] [solr] HoustonPutman commented on a change in pull request #120: SOLR-15089: Allow backup/restoration to Amazon's S3 blobstore

Reply via email to