This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new ef95f7a  HDFS-15937. Reduce memory used during datanode layout 
upgrade. Contributed by Stephen O'Donnell (#2838)
ef95f7a is described below

commit ef95f7a96355e93d69eb9a7c9d9dd672974ff22e
Author: Stephen O'Donnell <stephen.odonn...@gmail.com>
AuthorDate: Thu Apr 8 11:59:02 2021 +0100

    HDFS-15937. Reduce memory used during datanode layout upgrade. Contributed 
by Stephen O'Donnell (#2838)
    
    (cherry picked from commit 4c567fcff7af45c75117ee4a75c087aa454a89e5)
---
 .../hadoop/hdfs/server/datanode/DataStorage.java   | 98 ++++++++++++++--------
 1 file changed, 62 insertions(+), 36 deletions(-)

diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java
index e917b77..1d2f10f 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java
@@ -1064,12 +1064,26 @@ public class DataStorage extends Storage {
   }
 
   private static class LinkArgs {
-    File src;
-    File dst;
+    private File srcDir;
+    private File dstDir;
+    private String blockFile;
+
+    LinkArgs(File srcDir, File dstDir, String blockFile) {
+      this.srcDir = srcDir;
+      this.dstDir = dstDir;
+      this.blockFile = blockFile;
+    }
+
+    public File src() {
+      return new File(srcDir, blockFile);
+    }
 
-    LinkArgs(File src, File dst) {
-      this.src = src;
-      this.dst = dst;
+    public File dst() {
+      return new File(dstDir, blockFile);
+    }
+
+    public String blockFile() {
+      return blockFile;
     }
   }
 
@@ -1095,8 +1109,9 @@ public class DataStorage extends Storage {
     }
 
     final ArrayList<LinkArgs> idBasedLayoutSingleLinks = Lists.newArrayList();
-    linkBlocksHelper(from, to, oldLV, hl, upgradeToIdBasedLayout, to,
-        idBasedLayoutSingleLinks);
+    final Map<File, File> pathCache = new HashMap<>();
+    linkBlocksHelper(from, to, hl, upgradeToIdBasedLayout, to,
+        idBasedLayoutSingleLinks, pathCache);
 
     // Detect and remove duplicate entries.
     final ArrayList<LinkArgs> duplicates =
@@ -1122,7 +1137,7 @@ public class DataStorage extends Storage {
               idBasedLayoutSingleLinks.size());
           for (int j = iCopy; j < upperBound; j++) {
             LinkArgs cur = idBasedLayoutSingleLinks.get(j);
-            HardLink.createHardLink(cur.src, cur.dst);
+            HardLink.createHardLink(cur.src(), cur.dst());
           }
           return null;
         }
@@ -1155,9 +1170,9 @@ public class DataStorage extends Storage {
       @Override
       public int compare(LinkArgs a, LinkArgs b) {
         return ComparisonChain.start().
-            compare(a.src.getName(), b.src.getName()).
-            compare(a.src, b.src).
-            compare(a.dst, b.dst).
+            compare(a.blockFile(), b.blockFile()).
+            compare(a.src(), b.src()).
+            compare(a.dst(), b.dst()).
             result();
       }
     });
@@ -1167,8 +1182,8 @@ public class DataStorage extends Storage {
     boolean addedPrev = false;
     for (int i = 0; i < all.size(); i++) {
       LinkArgs args = all.get(i);
-      long blockId = Block.getBlockId(args.src.getName());
-      boolean isMeta = Block.isMetaFilename(args.src.getName());
+      long blockId = Block.getBlockId(args.blockFile());
+      boolean isMeta = Block.isMetaFilename(args.blockFile());
       if ((prevBlockId == null) ||
           (prevBlockId.longValue() != blockId)) {
         prevBlockId = blockId;
@@ -1207,10 +1222,10 @@ public class DataStorage extends Storage {
     TreeMap<Long, List<LinkArgs>> highestGenstamps =
         new TreeMap<Long, List<LinkArgs>>();
     for (LinkArgs duplicate : duplicates) {
-      if (!Block.isMetaFilename(duplicate.src.getName())) {
+      if (!Block.isMetaFilename(duplicate.blockFile())) {
         continue;
       }
-      long blockId = Block.getBlockId(duplicate.src.getName());
+      long blockId = Block.getBlockId(duplicate.blockFile());
       List<LinkArgs> prevHighest = highestGenstamps.get(blockId);
       if (prevHighest == null) {
         List<LinkArgs> highest = new LinkedList<LinkArgs>();
@@ -1219,8 +1234,8 @@ public class DataStorage extends Storage {
         continue;
       }
       long prevGenstamp =
-          Block.getGenerationStamp(prevHighest.get(0).src.getName());
-      long genstamp = Block.getGenerationStamp(duplicate.src.getName());
+          Block.getGenerationStamp(prevHighest.get(0).blockFile());
+      long genstamp = Block.getGenerationStamp(duplicate.blockFile());
       if (genstamp < prevGenstamp) {
         continue;
       }
@@ -1234,19 +1249,19 @@ public class DataStorage extends Storage {
     // from the duplicates list.
     for (Iterator<LinkArgs> iter = duplicates.iterator(); iter.hasNext(); ) {
       LinkArgs duplicate = iter.next();
-      long blockId = Block.getBlockId(duplicate.src.getName());
+      long blockId = Block.getBlockId(duplicate.blockFile());
       List<LinkArgs> highest = highestGenstamps.get(blockId);
       if (highest != null) {
         boolean found = false;
         for (LinkArgs high : highest) {
-          if (high.src.getParent().equals(duplicate.src.getParent())) {
+          if (high.src().getParent().equals(duplicate.src().getParent())) {
             found = true;
             break;
           }
         }
         if (!found) {
           LOG.warn("Unexpectedly low genstamp on {}.",
-              duplicate.src.getAbsolutePath());
+              duplicate.src().getAbsolutePath());
           iter.remove();
         }
       }
@@ -1257,25 +1272,25 @@ public class DataStorage extends Storage {
     // preserving one block file / metadata file pair.
     TreeMap<Long, LinkArgs> longestBlockFiles = new TreeMap<Long, LinkArgs>();
     for (LinkArgs duplicate : duplicates) {
-      if (Block.isMetaFilename(duplicate.src.getName())) {
+      if (Block.isMetaFilename(duplicate.blockFile())) {
         continue;
       }
-      long blockId = Block.getBlockId(duplicate.src.getName());
+      long blockId = Block.getBlockId(duplicate.blockFile());
       LinkArgs prevLongest = longestBlockFiles.get(blockId);
       if (prevLongest == null) {
         longestBlockFiles.put(blockId, duplicate);
         continue;
       }
-      long blockLength = duplicate.src.length();
-      long prevBlockLength = prevLongest.src.length();
+      long blockLength = duplicate.src().length();
+      long prevBlockLength = prevLongest.src().length();
       if (blockLength < prevBlockLength) {
         LOG.warn("Unexpectedly short length on {}.",
-            duplicate.src.getAbsolutePath());
+            duplicate.src().getAbsolutePath());
         continue;
       }
       if (blockLength > prevBlockLength) {
         LOG.warn("Unexpectedly short length on {}.",
-            prevLongest.src.getAbsolutePath());
+            prevLongest.src().getAbsolutePath());
       }
       longestBlockFiles.put(blockId, duplicate);
     }
@@ -1284,21 +1299,22 @@ public class DataStorage extends Storage {
     // arbitrarily selected by us.
     for (Iterator<LinkArgs> iter = all.iterator(); iter.hasNext(); ) {
       LinkArgs args = iter.next();
-      long blockId = Block.getBlockId(args.src.getName());
+      long blockId = Block.getBlockId(args.blockFile());
       LinkArgs bestDuplicate = longestBlockFiles.get(blockId);
       if (bestDuplicate == null) {
         continue; // file has no duplicates
       }
-      if (!bestDuplicate.src.getParent().equals(args.src.getParent())) {
-        LOG.warn("Discarding {}.", args.src.getAbsolutePath());
+      if (!bestDuplicate.src().getParent().equals(args.src().getParent())) {
+        LOG.warn("Discarding {}.", args.src().getAbsolutePath());
         iter.remove();
       }
     }
   }
 
-  static void linkBlocksHelper(File from, File to, int oldLV, HardLink hl,
-  boolean upgradeToIdBasedLayout, File blockRoot,
-      List<LinkArgs> idBasedLayoutSingleLinks) throws IOException {
+  static void linkBlocksHelper(File from, File to, HardLink hl,
+      boolean upgradeToIdBasedLayout, File blockRoot,
+      List<LinkArgs> idBasedLayoutSingleLinks, Map<File, File> pathCache)
+      throws IOException {
     if (!from.exists()) {
       return;
     }
@@ -1338,8 +1354,18 @@ public class DataStorage extends Storage {
               throw new IOException("Failed to mkdirs " + blockLocation);
             }
           }
-          idBasedLayoutSingleLinks.add(new LinkArgs(new File(from, blockName),
-              new File(blockLocation, blockName)));
+          /**
+           * The destination path is 32x32, so 1024 distinct paths. Therefore
+           * we cache the destination path and reuse the same File object on
+           * potentially thousands of blocks located on this volume.
+           * This method is called recursively so the cache is passed through
+           * each recursive call. There is one cache per volume, and it is only
+           * accessed by a single thread so no locking is needed.
+           */
+          File cachedDest = pathCache
+              .computeIfAbsent(blockLocation, k -> blockLocation);
+          idBasedLayoutSingleLinks.add(new LinkArgs(from,
+              cachedDest, blockName));
           hl.linkStats.countSingleLinks++;
         }
       } else {
@@ -1362,8 +1388,8 @@ public class DataStorage extends Storage {
     if (otherNames != null) {
       for (int i = 0; i < otherNames.length; i++) {
         linkBlocksHelper(new File(from, otherNames[i]),
-            new File(to, otherNames[i]), oldLV, hl, upgradeToIdBasedLayout,
-            blockRoot, idBasedLayoutSingleLinks);
+            new File(to, otherNames[i]), hl, upgradeToIdBasedLayout,
+            blockRoot, idBasedLayoutSingleLinks, pathCache);
       }
     }
   }

---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to