Repository: hadoop
Updated Branches:
  refs/heads/HADOOP-13345 8e257a406 -> 2d0684292


HADOOP-13760: S3Guard: add new classes


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2d068429
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2d068429
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2d068429

Branch: refs/heads/HADOOP-13345
Commit: 2d0684292bb8cd77509e71830ee047e788057a05
Parents: 8e257a4
Author: Sean Mackrory <mackror...@apache.org>
Authored: Thu May 25 07:11:20 2017 -0600
Committer: Sean Mackrory <mackror...@apache.org>
Committed: Thu May 25 07:11:20 2017 -0600

----------------------------------------------------------------------
 .../s3guard/MetadataStoreListFilesIterator.java | 168 +++++++++++++++++++
 .../org/apache/hadoop/fs/s3a/TestListing.java   |  94 +++++++++++
 2 files changed, 262 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d068429/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
new file mode 100644
index 0000000..272b1f4
--- /dev/null
+++ 
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Queue;
+import java.util.Set;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * {@code MetadataStoreListFilesIterator} is a {@link RemoteIterator} that
+ * is similar to {@code DescendantsIterator} but does not return directories
+ * that have (or may have) children, and will also provide access to the set of
+ * tombstones to allow recently deleted S3 objects to be filtered out from a
+ * corresponding request.  In other words, it returns tombstones and the same
+ * set of objects that should exist in S3: empty directories, and files, and 
not
+ * other directories whose existence is inferred therefrom.
+ *
+ * For example, assume the consistent store contains metadata representing this
+ * file system structure:
+ *
+ * <pre>
+ * /dir1
+ * |-- dir2
+ * |   |-- file1
+ * |   `-- file2
+ * `-- dir3
+ *     |-- dir4
+ *     |   `-- file3
+ *     |-- dir5
+ *     |   `-- file4
+ *     `-- dir6
+ * </pre>
+ *
+ * Consider this code sample:
+ * <pre>
+ * final PathMetadata dir1 = get(new Path("/dir1"));
+ * for (MetadataStoreListFilesIterator files =
+ *     new MetadataStoreListFilesIterator(dir1); files.hasNext(); ) {
+ *   final FileStatus status = files.next().getFileStatus();
+ *   System.out.printf("%s %s%n", status.isDirectory() ? 'D' : 'F',
+ *       status.getPath());
+ * }
+ * </pre>
+ *
+ * The output is:
+ * <pre>
+ * F /dir1/dir2/file1
+ * F /dir1/dir2/file2
+ * F /dir1/dir3/dir4/file3
+ * F /dir1/dir3/dir5/file4
+ * D /dir1/dir3/dir6
+ * </pre>
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class MetadataStoreListFilesIterator implements
+    RemoteIterator<FileStatus> {
+  public static final Logger LOG = LoggerFactory.getLogger(
+      MetadataStoreListFilesIterator.class);
+
+  private final boolean allowAuthoritative;
+  private final MetadataStore metadataStore;
+  private final Set<Path> tombstones = new HashSet<>();
+  private Iterator<FileStatus> leafNodesIterator = null;
+
+  public MetadataStoreListFilesIterator(MetadataStore ms, PathMetadata meta,
+      boolean allowAuthoritative) throws IOException {
+    Preconditions.checkNotNull(ms);
+    this.metadataStore = ms;
+    this.allowAuthoritative = allowAuthoritative;
+    prefetch(meta);
+  }
+
+  private void prefetch(PathMetadata meta) throws IOException {
+    final Queue<PathMetadata> queue = new LinkedList<>();
+    final Collection<FileStatus> leafNodes = new ArrayList<>();
+
+    if (meta != null) {
+      final Path path = meta.getFileStatus().getPath();
+      if (path.isRoot()) {
+        DirListingMetadata rootListing = metadataStore.listChildren(path);
+        if (rootListing != null) {
+          tombstones.addAll(rootListing.listTombstones());
+          queue.addAll(rootListing.withoutTombstones().getListing());
+        }
+      } else {
+        queue.add(meta);
+      }
+    }
+
+    while(!queue.isEmpty()) {
+      PathMetadata nextMetadata = queue.poll();
+      FileStatus nextStatus = nextMetadata.getFileStatus();
+      if (nextStatus.isFile()) {
+        // All files are leaf nodes by definition
+        leafNodes.add(nextStatus);
+        continue;
+      }
+      if (nextStatus.isDirectory()) {
+        final Path path = nextStatus.getPath();
+        DirListingMetadata children = metadataStore.listChildren(path);
+        if (children != null) {
+          tombstones.addAll(children.listTombstones());
+          Collection<PathMetadata> liveChildren =
+              children.withoutTombstones().getListing();
+          if (!liveChildren.isEmpty()) {
+            // If it's a directory, has children, not all deleted, then we
+            // add the children to the queue and move on to the next node
+            queue.addAll(liveChildren);
+            continue;
+          } else if (allowAuthoritative && children.isAuthoritative()) {
+            leafNodes.add(nextStatus);
+          }
+        }
+      }
+      // Directories that *might* be empty are ignored for now, since we
+      // cannot confirm that they are empty without incurring other costs.
+      // Users of this class can still discover empty directories via S3's
+      // fake directories, subject to the same consistency semantics as before.
+      // The only other possibility is a symlink, which is unsupported on S3A.
+    }
+    leafNodesIterator = leafNodes.iterator();
+  }
+
+  @Override
+  public boolean hasNext() {
+    return leafNodesIterator.hasNext();
+  }
+
+  @Override
+  public FileStatus next() {
+    return leafNodesIterator.next();
+  }
+
+  public Set<Path> listTombstones() {
+    return tombstones;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d068429/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java
----------------------------------------------------------------------
diff --git 
a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java
 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java
new file mode 100644
index 0000000..43eb2c0
--- /dev/null
+++ 
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+/**
+ * Place for the S3A listing classes; keeps all the small classes under 
control.
+ */
+public class TestListing extends AbstractS3AMockTest {
+
+  private static class MockRemoteIterator<FileStatus> implements
+      RemoteIterator<FileStatus> {
+    private Iterator<FileStatus> iterator;
+
+    MockRemoteIterator(Collection<FileStatus> source) {
+      iterator = source.iterator();
+    }
+
+    public boolean hasNext() {
+      return iterator.hasNext();
+    }
+
+    public FileStatus next() {
+      return iterator.next();
+    }
+  }
+
+  private FileStatus blankFileStatus(Path path) {
+    return new FileStatus(0, true, 0, 0, 0, path);
+  }
+
+  @Test
+  public void testTombstoneReconcilingIterator() throws Exception {
+    Path parent = new Path("/parent");
+    Path liveChild = new Path(parent, "/liveChild");
+    Path deletedChild = new Path(parent, "/deletedChild");
+    Path[] allFiles = {parent, liveChild, deletedChild};
+    Path[] liveFiles = {parent, liveChild};
+
+    Listing listing = new Listing(fs);
+    Collection<FileStatus> statuses = new ArrayList<>();
+    statuses.add(blankFileStatus(parent));
+    statuses.add(blankFileStatus(liveChild));
+    statuses.add(blankFileStatus(deletedChild));
+
+    Set<Path> tombstones = new HashSet<>();
+    tombstones.add(deletedChild);
+
+    RemoteIterator<FileStatus> sourceIterator = new MockRemoteIterator(
+        statuses);
+    RemoteIterator<LocatedFileStatus> locatedIterator =
+        listing.createLocatedFileStatusIterator(sourceIterator);
+    RemoteIterator<LocatedFileStatus> reconcilingIterator =
+        listing.createTombstoneReconcilingIterator(locatedIterator, 
tombstones);
+
+    Set<Path> expectedPaths = new HashSet<>();
+    expectedPaths.add(parent);
+    expectedPaths.add(liveChild);
+
+    Set<Path> actualPaths = new HashSet<>();
+    while (reconcilingIterator.hasNext()) {
+      actualPaths.add(reconcilingIterator.next().getPath());
+    }
+    Assert.assertTrue(actualPaths.equals(expectedPaths));
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to