bpahuja commented on code in PR #6407:
URL: https://github.com/apache/hadoop/pull/6407#discussion_r1456958842


##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectStorageClassFilter.java:
##########
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.thirdparty.com.google.common.collect.Sets;
+import java.util.Set;
+import java.util.function.Function;
+import software.amazon.awssdk.services.s3.model.ObjectStorageClass;
+import software.amazon.awssdk.services.s3.model.S3Object;
+
+
+/**
+ * S3ObjectStorageClassFilter will filter the S3 files based on the 
fs.s3a.glacier.read.restored.objects configuration set in S3AFileSystem
+ * The config can have 3 values:
+ * READ_ALL: This would conform to the current default behavior of not taking 
into account the storage classes retrieved from S3. This will be done to keep 
the current behavior for the customers and not changing the experience for them.
+ * SKIP_ALL_GLACIER: If this value is set then this will ignore any S3 Objects 
which are tagged with Glacier storage classes and retrieve the others.
+ * READ_RESTORED_GLACIER_OBJECTS: If this value is set then restored status of 
the Glacier object will be checked, if restored the objects would be read like 
normal S3 objects else they will be ignored as the objects would not have been 
retrieved from the S3 Glacier.
+ */
+public enum S3ObjectStorageClassFilter {
+  READ_ALL(o -> true),
+  SKIP_ALL_GLACIER(S3ObjectStorageClassFilter::isNotGlacierObject),
+  
READ_RESTORED_GLACIER_OBJECTS(S3ObjectStorageClassFilter::isCompletedRestoredObject);
+
+  private static final Set<ObjectStorageClass> GLACIER_STORAGE_CLASSES = 
Sets.newHashSet(ObjectStorageClass.GLACIER, ObjectStorageClass.DEEP_ARCHIVE);

Review Comment:
   GLACIER_IR files, are instantly available so no failure is observed. S3A 
will be able to access the same. 



##########
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml:
##########
@@ -2191,6 +2191,18 @@
   </description>
 </property>
 
+<property>

Review Comment:
   Sure will do



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to