steveloughran commented on code in PR #6407:
URL: https://github.com/apache/hadoop/pull/6407#discussion_r1530919362


##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/S3ObjectStorageClassFilter.java:
##########
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.api;
+
+import java.util.Set;
+import java.util.function.Function;
+
+import software.amazon.awssdk.services.s3.model.ObjectStorageClass;
+import software.amazon.awssdk.services.s3.model.S3Object;
+
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.thirdparty.com.google.common.collect.Sets;
+
+
+/**
+ * <pre>
+ * {@link S3ObjectStorageClassFilter} will filter the S3 files based on the
+ * {@code fs.s3a.glacier.read.restored.objects} configuration set in {@link 
S3AFileSystem}
+ * The config can have 3 values:
+ * {@code READ_ALL}: Retrieval of Glacier files will fail with 
InvalidObjectStateException:
+ * The operation is not valid for the object's storage class.
+ * {@code SKIP_ALL_GLACIER}: If this value is set then this will ignore any S3 
Objects which are
+ * tagged with Glacier storage classes and retrieve the others.
+ * {@code READ_RESTORED_GLACIER_OBJECTS}: If this value is set then restored 
status of the Glacier
+ * object will be checked, if restored the objects would be read like normal 
S3 objects
+ * else they will be ignored as the objects would not have been retrieved from 
the S3 Glacier.
+ * </pre>
+ */
+public enum S3ObjectStorageClassFilter {
+  READ_ALL(o -> true),
+  SKIP_ALL_GLACIER(S3ObjectStorageClassFilter::isNotGlacierObject),
+  
READ_RESTORED_GLACIER_OBJECTS(S3ObjectStorageClassFilter::isCompletedRestoredObject);
+
+  private static final Set<ObjectStorageClass> GLACIER_STORAGE_CLASSES = 
Sets.newHashSet(
+      ObjectStorageClass.GLACIER, ObjectStorageClass.DEEP_ARCHIVE);
+
+  private final Function<S3Object, Boolean> filter;
+
+  S3ObjectStorageClassFilter(Function<S3Object, Boolean> filter) {

Review Comment:
   should this be private, or it implicit with enums?



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/S3ObjectStorageClassFilter.java:
##########
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.api;
+
+import java.util.Set;
+import java.util.function.Function;
+
+import software.amazon.awssdk.services.s3.model.ObjectStorageClass;
+import software.amazon.awssdk.services.s3.model.S3Object;
+
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.thirdparty.com.google.common.collect.Sets;

Review Comment:
   could you use org.apache.hadoop.util.Sets here. its part of our attempt to 
isolate ourselves better from guava changes and the pain that causes downstream



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/list/ITestS3AReadRestoredGlacierObjects.java:
##########
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.list;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.assertj.core.api.Assertions;
+import org.junit.Assume;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.RestoreObjectRequest;
+import software.amazon.awssdk.services.s3.model.S3Object;
+import software.amazon.awssdk.services.s3.model.Tier;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.S3ListRequest;
+import org.apache.hadoop.fs.s3a.api.S3ObjectStorageClassFilter;
+import org.apache.hadoop.fs.store.audit.AuditSpan;
+import org.apache.hadoop.test.LambdaTestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.READ_RESTORED_GLACIER_OBJECTS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_DEEP_ARCHIVE;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_GLACIER;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfStorageClassTestsDisabled;
+import static 
org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST;
+
+
+/**
+ * Tests of various cases related to Glacier/Deep Archive Storage class.
+ */
+@RunWith(Parameterized.class)
+public class ITestS3AReadRestoredGlacierObjects extends AbstractS3ATestBase {
+
+  enum Type { GLACIER_AND_DEEP_ARCHIVE, GLACIER }
+
+  @Parameterized.Parameters(name = "storage-class-{1}")
+  public static Collection<Object[]> data(){
+    return Arrays.asList(new Object[][] {
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_GLACIER},
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_DEEP_ARCHIVE},
+        {Type.GLACIER, STORAGE_CLASS_GLACIER}
+    });
+  }
+
+  private final int maxRetries = 100;
+  private final int retryDelayMs = 5000;
+
+  private final Type type;
+  private final String glacierClass;
+
+  public ITestS3AReadRestoredGlacierObjects(Type type, String glacierClass) {
+    this.type = type;
+    this.glacierClass = glacierClass;
+  }
+
+  private FileSystem createFiles(String s3ObjectStorageClassFilter) throws 
Throwable {
+    Configuration conf = this.createConfiguration();
+    conf.set(READ_RESTORED_GLACIER_OBJECTS, s3ObjectStorageClassFilter);
+    // Create Glacier objects:Storage Class:DEEP_ARCHIVE/GLACIER
+    conf.set(STORAGE_CLASS, glacierClass);
+    S3AContract contract = (S3AContract) createContract(conf);
+    contract.init();
+
+    FileSystem fs = contract.getTestFileSystem();
+    Path path = new Path(methodPath(), "glaciated");
+    ContractTestUtils.touch(fs, path);
+    return fs;
+  }
+
+  @Override
+  protected Configuration createConfiguration() {
+    Configuration newConf = super.createConfiguration();
+    skipIfStorageClassTestsDisabled(newConf);
+    disableFilesystemCaching(newConf);
+    removeBaseAndBucketOverrides(newConf, STORAGE_CLASS, 
READ_RESTORED_GLACIER_OBJECTS);
+    return newConf;
+  }
+
+  @Test
+  public void testConfigWithInvalidValue() throws Throwable {
+    Assume.assumeTrue(type == Type.GLACIER_AND_DEEP_ARCHIVE);
+    String invalidValue = "ABCDE";
+    try (FileSystem fs = createFiles(invalidValue)) {
+      Assertions.assertThat(
+              fs.listStatus(methodPath()))
+          .describedAs("FileStatus List of %s", methodPath()).isNotEmpty();
+    }
+  }
+
+  @Test
+  public void testIgnoreGlacierObject() throws Throwable {
+    Assume.assumeTrue(type == Type.GLACIER_AND_DEEP_ARCHIVE);
+    try (FileSystem fs = 
createFiles(S3ObjectStorageClassFilter.SKIP_ALL_GLACIER.name())) {
+      Assertions.assertThat(
+          fs.listStatus(methodPath()))
+        .describedAs("FileStatus List of %s", methodPath()).isEmpty();
+    }
+  }
+
+  @Test
+  public void testIgnoreRestoringGlacierObject() throws Throwable {
+    Assume.assumeTrue(type == Type.GLACIER_AND_DEEP_ARCHIVE);
+    try (FileSystem fs = 
createFiles(S3ObjectStorageClassFilter.READ_RESTORED_GLACIER_OBJECTS.name())) {
+      Assertions.assertThat(
+              fs.listStatus(
+                  methodPath()))
+          .describedAs("FileStatus List of %s", methodPath()).isEmpty();
+    }
+  }
+
+  @Test
+  public void testRestoredGlacierObject() throws Throwable {
+    // Skipping this test for Deep Archive as expedited retrieval is not 
supported
+    Assume.assumeTrue(type == Type.GLACIER);
+    try (FileSystem fs = 
createFiles(S3ObjectStorageClassFilter.READ_RESTORED_GLACIER_OBJECTS.name())) {
+      restoreGlacierObject(getFilePrefixForListObjects(), 2);
+      Assertions.assertThat(
+              fs.listStatus(
+                  methodPath()))
+          .describedAs("FileStatus List of %s", methodPath()).isNotEmpty();
+    }
+  }
+
+  @Test
+  public void testReadAllObjects() throws Throwable {
+    Assume.assumeTrue(type == Type.GLACIER_AND_DEEP_ARCHIVE);
+    try (FileSystem fs = 
createFiles(S3ObjectStorageClassFilter.READ_ALL.name())) {
+      Assertions.assertThat(
+              fs.listStatus(methodPath()))
+          .describedAs("FileStatus List of %s", methodPath()).isNotEmpty();
+    }
+  }
+
+
+  private void restoreGlacierObject(String glacierObjectKey, int 
expirationDays) throws Exception {
+    try (AuditSpan auditSpan = getSpanSource().createSpan(OBJECT_LIST_REQUEST, 
"", "").activate()) {
+

Review Comment:
   so there's actually a span() method in the test superclass, an need to worry 
about closing it, but you've learned more about audit spans...



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/list/ITestS3AReadRestoredGlacierObjects.java:
##########
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.list;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.assertj.core.api.Assertions;
+import org.junit.Assume;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.RestoreObjectRequest;
+import software.amazon.awssdk.services.s3.model.S3Object;
+import software.amazon.awssdk.services.s3.model.Tier;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.S3ListRequest;
+import org.apache.hadoop.fs.s3a.api.S3ObjectStorageClassFilter;
+import org.apache.hadoop.fs.store.audit.AuditSpan;
+import org.apache.hadoop.test.LambdaTestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.READ_RESTORED_GLACIER_OBJECTS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_DEEP_ARCHIVE;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_GLACIER;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfStorageClassTestsDisabled;
+import static 
org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST;
+
+
+/**
+ * Tests of various cases related to Glacier/Deep Archive Storage class.
+ */
+@RunWith(Parameterized.class)
+public class ITestS3AReadRestoredGlacierObjects extends AbstractS3ATestBase {
+
+  enum Type { GLACIER_AND_DEEP_ARCHIVE, GLACIER }
+
+  @Parameterized.Parameters(name = "storage-class-{1}")
+  public static Collection<Object[]> data(){
+    return Arrays.asList(new Object[][] {
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_GLACIER},
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_DEEP_ARCHIVE},
+        {Type.GLACIER, STORAGE_CLASS_GLACIER}
+    });
+  }
+
+  private final int maxRetries = 100;
+  private final int retryDelayMs = 5000;
+
+  private final Type type;
+  private final String glacierClass;
+
+  public ITestS3AReadRestoredGlacierObjects(Type type, String glacierClass) {
+    this.type = type;
+    this.glacierClass = glacierClass;
+  }
+
+  private FileSystem createFiles(String s3ObjectStorageClassFilter) throws 
Throwable {
+    Configuration conf = this.createConfiguration();
+    conf.set(READ_RESTORED_GLACIER_OBJECTS, s3ObjectStorageClassFilter);
+    // Create Glacier objects:Storage Class:DEEP_ARCHIVE/GLACIER
+    conf.set(STORAGE_CLASS, glacierClass);
+    S3AContract contract = (S3AContract) createContract(conf);

Review Comment:
   you don't need to do this. just do 
   ```
   fs = new S3AFileSystem()
   fs.init(conf)
   ```
   



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/list/ITestS3AReadRestoredGlacierObjects.java:
##########
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.list;
+
+import static org.apache.hadoop.fs.s3a.Constants.READ_RESTORED_GLACIER_OBJECTS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_DEEP_ARCHIVE;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_GLACIER;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfStorageClassTestsDisabled;
+import static 
org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.S3ListRequest;
+import org.apache.hadoop.fs.s3a.S3ObjectStorageClassFilter;
+import org.apache.hadoop.fs.store.audit.AuditSpan;
+import org.apache.hadoop.test.LambdaTestUtils;
+import org.assertj.core.api.Assertions;
+import org.junit.Assume;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.RestoreObjectRequest;
+import software.amazon.awssdk.services.s3.model.S3Object;
+import software.amazon.awssdk.services.s3.model.Tier;
+
+/**
+ * Tests of various cases related to Glacier/Deep Archive Storage class.
+ */
+@RunWith(Parameterized.class)
+public class ITestS3AReadRestoredGlacierObjects extends AbstractS3ATestBase {
+
+  enum Type { GLACIER_AND_DEEP_ARCHIVE, GLACIER }
+
+  @Parameterized.Parameters(name = "storage-class-{1}")
+  public static Collection<Object[]> data(){
+    return Arrays.asList(new Object[][] {
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_GLACIER},
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_DEEP_ARCHIVE},
+        {Type.GLACIER, STORAGE_CLASS_GLACIER}
+    });
+  }
+
+  private final int maxRetries = 100;
+  private final int retryDelayMs = 5000;
+
+  private final Type type;
+  private final String glacierClass;
+
+  public ITestS3AReadRestoredGlacierObjects(Type type, String glacierClass) {
+    this.type = type;
+    this.glacierClass = glacierClass;
+  }
+
+  private FileSystem createFiles(String s3ObjectStorageClassFilter) throws 
Throwable {
+    Configuration conf = this.createConfiguration();

Review Comment:
   sorry I meant "remove the `this.` prefix"; just use createConfiguration() 
directly



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/list/ITestS3AReadRestoredGlacierObjects.java:
##########
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.list;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.assertj.core.api.Assertions;
+import org.junit.Assume;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.RestoreObjectRequest;
+import software.amazon.awssdk.services.s3.model.S3Object;
+import software.amazon.awssdk.services.s3.model.Tier;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.S3ListRequest;
+import org.apache.hadoop.fs.s3a.api.S3ObjectStorageClassFilter;
+import org.apache.hadoop.fs.store.audit.AuditSpan;
+import org.apache.hadoop.test.LambdaTestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.READ_RESTORED_GLACIER_OBJECTS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_DEEP_ARCHIVE;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_GLACIER;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfStorageClassTestsDisabled;
+import static 
org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST;
+
+
+/**
+ * Tests of various cases related to Glacier/Deep Archive Storage class.
+ */
+@RunWith(Parameterized.class)
+public class ITestS3AReadRestoredGlacierObjects extends AbstractS3ATestBase {
+
+  enum Type { GLACIER_AND_DEEP_ARCHIVE, GLACIER }
+
+  @Parameterized.Parameters(name = "storage-class-{1}")
+  public static Collection<Object[]> data(){
+    return Arrays.asList(new Object[][] {
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_GLACIER},
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_DEEP_ARCHIVE},
+        {Type.GLACIER, STORAGE_CLASS_GLACIER}
+    });
+  }
+
+  private final int maxRetries = 100;
+  private final int retryDelayMs = 5000;
+
+  private final Type type;
+  private final String glacierClass;
+
+  public ITestS3AReadRestoredGlacierObjects(Type type, String glacierClass) {
+    this.type = type;
+    this.glacierClass = glacierClass;
+  }
+
+  private FileSystem createFiles(String s3ObjectStorageClassFilter) throws 
Throwable {
+    Configuration conf = this.createConfiguration();
+    conf.set(READ_RESTORED_GLACIER_OBJECTS, s3ObjectStorageClassFilter);
+    // Create Glacier objects:Storage Class:DEEP_ARCHIVE/GLACIER
+    conf.set(STORAGE_CLASS, glacierClass);
+    S3AContract contract = (S3AContract) createContract(conf);
+    contract.init();
+
+    FileSystem fs = contract.getTestFileSystem();
+    Path path = new Path(methodPath(), "glaciated");
+    ContractTestUtils.touch(fs, path);
+    return fs;
+  }
+
+  @Override
+  protected Configuration createConfiguration() {
+    Configuration newConf = super.createConfiguration();
+    skipIfStorageClassTestsDisabled(newConf);
+    disableFilesystemCaching(newConf);
+    removeBaseAndBucketOverrides(newConf, STORAGE_CLASS, 
READ_RESTORED_GLACIER_OBJECTS);
+    return newConf;
+  }
+
+  @Test
+  public void testConfigWithInvalidValue() throws Throwable {
+    Assume.assumeTrue(type == Type.GLACIER_AND_DEEP_ARCHIVE);
+    String invalidValue = "ABCDE";
+    try (FileSystem fs = createFiles(invalidValue)) {
+      Assertions.assertThat(
+              fs.listStatus(methodPath()))
+          .describedAs("FileStatus List of %s", methodPath()).isNotEmpty();
+    }
+  }
+
+  @Test
+  public void testIgnoreGlacierObject() throws Throwable {
+    Assume.assumeTrue(type == Type.GLACIER_AND_DEEP_ARCHIVE);
+    try (FileSystem fs = 
createFiles(S3ObjectStorageClassFilter.SKIP_ALL_GLACIER.name())) {
+      Assertions.assertThat(
+          fs.listStatus(methodPath()))
+        .describedAs("FileStatus List of %s", methodPath()).isEmpty();

Review Comment:
   can you put the .isEmpty() on the line below; we like to try and split them 
up as they're very complex to read.



##########
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/S3ObjectStorageClassFilter.java:
##########
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.api;
+
+import java.util.Set;
+import java.util.function.Function;
+
+import software.amazon.awssdk.services.s3.model.ObjectStorageClass;
+import software.amazon.awssdk.services.s3.model.S3Object;
+
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.thirdparty.com.google.common.collect.Sets;
+
+
+/**
+ * <pre>
+ * {@link S3ObjectStorageClassFilter} will filter the S3 files based on the
+ * {@code fs.s3a.glacier.read.restored.objects} configuration set in {@link 
S3AFileSystem}
+ * The config can have 3 values:
+ * {@code READ_ALL}: Retrieval of Glacier files will fail with 
InvalidObjectStateException:
+ * The operation is not valid for the object's storage class.
+ * {@code SKIP_ALL_GLACIER}: If this value is set then this will ignore any S3 
Objects which are
+ * tagged with Glacier storage classes and retrieve the others.
+ * {@code READ_RESTORED_GLACIER_OBJECTS}: If this value is set then restored 
status of the Glacier
+ * object will be checked, if restored the objects would be read like normal 
S3 objects
+ * else they will be ignored as the objects would not have been retrieved from 
the S3 Glacier.
+ * </pre>
+ */
+public enum S3ObjectStorageClassFilter {
+  READ_ALL(o -> true),
+  SKIP_ALL_GLACIER(S3ObjectStorageClassFilter::isNotGlacierObject),
+  
READ_RESTORED_GLACIER_OBJECTS(S3ObjectStorageClassFilter::isCompletedRestoredObject);
+
+  private static final Set<ObjectStorageClass> GLACIER_STORAGE_CLASSES = 
Sets.newHashSet(
+      ObjectStorageClass.GLACIER, ObjectStorageClass.DEEP_ARCHIVE);
+
+  private final Function<S3Object, Boolean> filter;

Review Comment:
   nit: add a javadoc here and in the getter



##########
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md:
##########
@@ -1308,8 +1308,34 @@ The switch to turn S3A auditing on or off.
     Should auditing of S3A requests be enabled?
   </description>
 </property>
+```
+## <a name="glacier"></a> Glacier Object Support
+
+[Amazon S3 Glacier (S3 
Glacier)](https://docs.aws.amazon.com/amazonglacier/latest/dev/introduction.html)
 is a secure and durable service for low-cost data archiving and
+long-term backup.
+With S3 Glacier, you can store your data cost effectively for months, years, 
or even decades.

Review Comment:
   replace "you" with something like "it is possible to store data more 
cost-effectively";  this is the docs for the connector, not marketing...



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/list/ITestS3AReadRestoredGlacierObjects.java:
##########
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.list;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.assertj.core.api.Assertions;
+import org.junit.Assume;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.RestoreObjectRequest;
+import software.amazon.awssdk.services.s3.model.S3Object;
+import software.amazon.awssdk.services.s3.model.Tier;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.S3ListRequest;
+import org.apache.hadoop.fs.s3a.api.S3ObjectStorageClassFilter;
+import org.apache.hadoop.fs.store.audit.AuditSpan;
+import org.apache.hadoop.test.LambdaTestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.READ_RESTORED_GLACIER_OBJECTS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_DEEP_ARCHIVE;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_GLACIER;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfStorageClassTestsDisabled;
+import static 
org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST;
+
+
+/**
+ * Tests of various cases related to Glacier/Deep Archive Storage class.
+ */
+@RunWith(Parameterized.class)
+public class ITestS3AReadRestoredGlacierObjects extends AbstractS3ATestBase {
+
+  enum Type { GLACIER_AND_DEEP_ARCHIVE, GLACIER }
+
+  @Parameterized.Parameters(name = "storage-class-{1}")
+  public static Collection<Object[]> data(){
+    return Arrays.asList(new Object[][] {
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_GLACIER},
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_DEEP_ARCHIVE},
+        {Type.GLACIER, STORAGE_CLASS_GLACIER}
+    });
+  }
+
+  private final int maxRetries = 100;

Review Comment:
   these are static/final, so make constants and upper case.
   



##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/list/ITestS3AReadRestoredGlacierObjects.java:
##########
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.list;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.assertj.core.api.Assertions;
+import org.junit.Assume;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.RestoreObjectRequest;
+import software.amazon.awssdk.services.s3.model.S3Object;
+import software.amazon.awssdk.services.s3.model.Tier;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
+import org.apache.hadoop.fs.s3a.S3ListRequest;
+import org.apache.hadoop.fs.s3a.api.S3ObjectStorageClassFilter;
+import org.apache.hadoop.fs.store.audit.AuditSpan;
+import org.apache.hadoop.test.LambdaTestUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.READ_RESTORED_GLACIER_OBJECTS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_DEEP_ARCHIVE;
+import static org.apache.hadoop.fs.s3a.Constants.STORAGE_CLASS_GLACIER;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
+import static 
org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfStorageClassTestsDisabled;
+import static 
org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST;
+
+
+/**
+ * Tests of various cases related to Glacier/Deep Archive Storage class.
+ */
+@RunWith(Parameterized.class)
+public class ITestS3AReadRestoredGlacierObjects extends AbstractS3ATestBase {
+
+  enum Type { GLACIER_AND_DEEP_ARCHIVE, GLACIER }
+
+  @Parameterized.Parameters(name = "storage-class-{1}")
+  public static Collection<Object[]> data(){
+    return Arrays.asList(new Object[][] {
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_GLACIER},
+        {Type.GLACIER_AND_DEEP_ARCHIVE, STORAGE_CLASS_DEEP_ARCHIVE},
+        {Type.GLACIER, STORAGE_CLASS_GLACIER}
+    });
+  }
+
+  private final int maxRetries = 100;
+  private final int retryDelayMs = 5000;
+
+  private final Type type;
+  private final String glacierClass;
+
+  public ITestS3AReadRestoredGlacierObjects(Type type, String glacierClass) {
+    this.type = type;
+    this.glacierClass = glacierClass;
+  }
+
+  private FileSystem createFiles(String s3ObjectStorageClassFilter) throws 
Throwable {
+    Configuration conf = this.createConfiguration();
+    conf.set(READ_RESTORED_GLACIER_OBJECTS, s3ObjectStorageClassFilter);
+    // Create Glacier objects:Storage Class:DEEP_ARCHIVE/GLACIER
+    conf.set(STORAGE_CLASS, glacierClass);
+    S3AContract contract = (S3AContract) createContract(conf);
+    contract.init();
+
+    FileSystem fs = contract.getTestFileSystem();
+    Path path = new Path(methodPath(), "glaciated");
+    ContractTestUtils.touch(fs, path);
+    return fs;
+  }
+
+  @Override
+  protected Configuration createConfiguration() {
+    Configuration newConf = super.createConfiguration();
+    skipIfStorageClassTestsDisabled(newConf);
+    disableFilesystemCaching(newConf);
+    removeBaseAndBucketOverrides(newConf, STORAGE_CLASS, 
READ_RESTORED_GLACIER_OBJECTS);
+    return newConf;
+  }
+
+  @Test
+  public void testConfigWithInvalidValue() throws Throwable {
+    Assume.assumeTrue(type == Type.GLACIER_AND_DEEP_ARCHIVE);
+    String invalidValue = "ABCDE";
+    try (FileSystem fs = createFiles(invalidValue)) {
+      Assertions.assertThat(
+              fs.listStatus(methodPath()))
+          .describedAs("FileStatus List of %s", methodPath()).isNotEmpty();
+    }
+  }
+
+  @Test
+  public void testIgnoreGlacierObject() throws Throwable {
+    Assume.assumeTrue(type == Type.GLACIER_AND_DEEP_ARCHIVE);
+    try (FileSystem fs = 
createFiles(S3ObjectStorageClassFilter.SKIP_ALL_GLACIER.name())) {
+      Assertions.assertThat(
+          fs.listStatus(methodPath()))
+        .describedAs("FileStatus List of %s", methodPath()).isEmpty();
+    }
+  }
+
+  @Test
+  public void testIgnoreRestoringGlacierObject() throws Throwable {
+    Assume.assumeTrue(type == Type.GLACIER_AND_DEEP_ARCHIVE);
+    try (FileSystem fs = 
createFiles(S3ObjectStorageClassFilter.READ_RESTORED_GLACIER_OBJECTS.name())) {
+      Assertions.assertThat(
+              fs.listStatus(
+                  methodPath()))
+          .describedAs("FileStatus List of %s", methodPath()).isEmpty();
+    }
+  }
+
+  @Test
+  public void testRestoredGlacierObject() throws Throwable {
+    // Skipping this test for Deep Archive as expedited retrieval is not 
supported
+    Assume.assumeTrue(type == Type.GLACIER);
+    try (FileSystem fs = 
createFiles(S3ObjectStorageClassFilter.READ_RESTORED_GLACIER_OBJECTS.name())) {
+      restoreGlacierObject(getFilePrefixForListObjects(), 2);
+      Assertions.assertThat(
+              fs.listStatus(
+                  methodPath()))
+          .describedAs("FileStatus List of %s", methodPath()).isNotEmpty();
+    }
+  }
+
+  @Test
+  public void testReadAllObjects() throws Throwable {
+    Assume.assumeTrue(type == Type.GLACIER_AND_DEEP_ARCHIVE);
+    try (FileSystem fs = 
createFiles(S3ObjectStorageClassFilter.READ_ALL.name())) {
+      Assertions.assertThat(
+              fs.listStatus(methodPath()))
+          .describedAs("FileStatus List of %s", methodPath()).isNotEmpty();
+    }
+  }
+
+
+  private void restoreGlacierObject(String glacierObjectKey, int 
expirationDays) throws Exception {
+    try (AuditSpan auditSpan = getSpanSource().createSpan(OBJECT_LIST_REQUEST, 
"", "").activate()) {

Review Comment:
   and use a describe() call to print out what is about to happen; useful for 
IDE runs



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org


Reply via email to