[ 
https://issues.apache.org/jira/browse/HDFS-16091?focusedWorklogId=658775&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-658775
 ]

ASF GitHub Bot logged work on HDFS-16091:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 01/Oct/21 07:16
            Start Date: 01/Oct/21 07:16
    Worklog Time Spent: 10m 
      Work Description: jojochuang commented on a change in pull request #3374:
URL: https://github.com/apache/hadoop/pull/3374#discussion_r719923541



##########
File path: 
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java
##########
@@ -357,6 +357,13 @@ public static int compareBytes(byte[] left, byte[] right) {
     return path;
   }
 
+  /**
+   * Given a list of path components returns a string.
+   */
+  public static String byteArray2String(byte[][] pathComponents) {
+    return bytes2String(byteArray2bytes(pathComponents));

Review comment:
       can we add a test for this method?

##########
File path: 
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
##########
@@ -1460,6 +1472,64 @@ SnapshotDiffReport decodeResponse(Map<?, ?> json) {
     }.run();
   }
 
+  public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir,
+      final String fromSnapshot, final String toSnapshot) throws IOException {
+    statistics.incrementReadOps(1);
+    storageStatistics.incrementOpCounter(OpType.GET_SNAPSHOT_DIFF);
+
+    if (!isValidSnapshotName(fromSnapshot) || 
!isValidSnapshotName(toSnapshot)) {
+      // In case the diff needs to be computed between a snapshot and the 
current
+      // tree, we should not do iterative diffReport computation as the 
iterative
+      // approach might fail if in between the rpc calls the current tree
+      // changes in absence of the global fsn lock.
+      return getSnapshotDiffReportWithoutListing(snapshotDir, fromSnapshot, 
toSnapshot);
+    } else {
+      byte[] startPath = DFSUtilClient.EMPTY_BYTES;
+      int index = -1;
+      SnapshotDiffReportGenerator snapshotDiffReport;
+      List<DiffReportListingEntry> modifiedList = new TreeList();
+      List<DiffReportListingEntry> createdList = new ChunkedArrayList<>();
+      List<DiffReportListingEntry> deletedList = new ChunkedArrayList<>();
+      SnapshotDiffReportListing report;
+
+      do {
+        try {
+          report = new FsPathResponseRunner<SnapshotDiffReportListing>(
+              GetOpParam.Op.GETSNAPSHOTDIFFLISTING,
+              snapshotDir,
+              new OldSnapshotNameParam(fromSnapshot),
+              new SnapshotNameParam(toSnapshot),
+              new 
SnapshotDiffStartPathParam(DFSUtilClient.bytes2String(startPath)),
+              new SnapshotDiffIndexParam(index)) {
+            @Override
+            SnapshotDiffReportListing decodeResponse(Map<?, ?> json) {
+              return JsonUtilClient.toSnapshotDiffReportListing(json);
+            }
+          }.run();
+        } catch (UnsupportedOperationException e) {
+          // In case the server doesn't support getSnapshotDiffReportListing,

Review comment:
       it would be nice to cache and remember this server does not support 
getSnapshotDiffReportListing.
   Maybe as a follow-up enhancement.

##########
File path: 
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
##########
@@ -1460,6 +1472,64 @@ SnapshotDiffReport decodeResponse(Map<?, ?> json) {
     }.run();
   }
 
+  public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir,

Review comment:
       this is by and large copied from 
DistributedFileSystem.getSnapshotDiffReportInternal()

##########
File path: 
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java
##########
@@ -834,6 +844,53 @@ public static SnapshotDiffReport 
toSnapshotDiffReport(final Map<?, ?> json) {
     return new SnapshotDiffReport.DiffReportEntry(type, sourcePath, 
targetPath);
   }
 
+  public static SnapshotDiffReportListing toSnapshotDiffReportListing(
+      final Map<?, ?> json) {
+    if (json == null) {
+      return null;
+    }
+
+    Map<?, ?> m =
+        (Map<?, ?>) json.get(SnapshotDiffReportListing.class.getSimpleName());
+    byte[] lastPath = DFSUtilClient.string2Bytes(getString(m, "lastPath", ""));
+    int lastIndex = getInt(m, "lastIndex", -1);
+    boolean isFromEarlier = getBoolean(m, "isFromEarlier", false);
+    List<DiffReportListingEntry> modifyList =
+        toDiffListingList(getList(m, "modifyList"));
+    List<DiffReportListingEntry> createList =
+        toDiffListingList(getList(m, "createList"));
+    List<DiffReportListingEntry> deleteList =
+        toDiffListingList(getList(m, "deleteList"));
+
+    return new SnapshotDiffReportListing(

Review comment:
       maybe we can add a builder for this class in the future.

##########
File path: 
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
##########
@@ -1445,12 +1451,18 @@ public void renameSnapshot(final Path path, final 
String snapshotOldName,
         new SnapshotNameParam(snapshotNewName)).run();
   }
 
-  public SnapshotDiffReport getSnapshotDiffReport(final Path snapshotDir,
-      final String fromSnapshot, final String toSnapshot) throws IOException {
-    statistics.incrementReadOps(1);
-    storageStatistics.incrementOpCounter(OpType.GET_SNAPSHOT_DIFF);
-    final HttpOpParam.Op op = GetOpParam.Op.GETSNAPSHOTDIFF;
-    return new FsPathResponseRunner<SnapshotDiffReport>(op, snapshotDir,
+  private boolean isValidSnapshotName(String snapshotName) {

Review comment:
       this is borrowed from DistributedFileSystem.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 658775)
    Time Spent: 1h 40m  (was: 1.5h)

> WebHDFS should support getSnapshotDiffReportListing
> ---------------------------------------------------
>
>                 Key: HDFS-16091
>                 URL: https://issues.apache.org/jira/browse/HDFS-16091
>             Project: Hadoop HDFS
>          Issue Type: Improvement
>            Reporter: Masatake Iwasaki
>            Assignee: Masatake Iwasaki
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 1h 40m
>  Remaining Estimate: 0h
>
> When there are millions of diffs between two snapshots, the old 
> getSnapshotDiffReport() isn't scalable.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org

Reply via email to