[ 
https://issues.apache.org/jira/browse/HDFS-16039?focusedWorklogId=614063&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-614063
 ]

ASF GitHub Bot logged work on HDFS-16039:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 23/Jun/21 15:13
            Start Date: 23/Jun/21 15:13
    Worklog Time Spent: 10m 
      Work Description: goiri commented on a change in pull request #3086:
URL: https://github.com/apache/hadoop/pull/3086#discussion_r657203156



##########
File path: 
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestRBFMetrics.java
##########
@@ -382,4 +366,56 @@ private void testCapacity(FederationMBean bean) throws 
IOException {
     assertNotEquals(availableCapacity,
         BigInteger.valueOf(bean.getRemainingCapacity()));
   }
+
+  @Test
+  public void testDatanodeNumMetrics()
+      throws Exception {
+    Configuration routerConf = new RouterConfigBuilder()
+        .metrics()
+        .http()
+        .stateStore()
+        .rpc()
+        .build();
+    MiniRouterDFSCluster cluster = new MiniRouterDFSCluster(false, 1);
+    cluster.setNumDatanodesPerNameservice(0);
+    cluster.addNamenodeOverrides(routerConf);
+    cluster.startCluster();
+    routerConf.setTimeDuration(
+        RBFConfigKeys.DN_REPORT_CACHE_EXPIRE, 10000, TimeUnit.SECONDS);
+    cluster.addRouterOverrides(routerConf);
+    cluster.startRouters();
+    Router router = cluster.getRandomRouter().getRouter();
+    // Register and verify all NNs with all routers
+    cluster.registerNamenodes();
+    cluster.waitNamenodeRegistration();
+    RouterRpcServer rpcServer = router.getRpcServer();
+    RBFMetrics rbfMetrics = router.getMetrics();
+    // Create mock dn
+    DatanodeInfo[] dNInfo = new DatanodeInfo[4];
+    DatanodeInfo datanodeInfo = new DatanodeInfo.DatanodeInfoBuilder().build();
+    datanodeInfo.setDecommissioned();
+    dNInfo[0] = datanodeInfo;
+    datanodeInfo = new DatanodeInfo.DatanodeInfoBuilder().build();
+    datanodeInfo.setInMaintenance();
+    dNInfo[1] = datanodeInfo;
+    datanodeInfo = new DatanodeInfo.DatanodeInfoBuilder().build();
+    datanodeInfo.startMaintenance();
+    dNInfo[2] = datanodeInfo;
+    datanodeInfo = new DatanodeInfo.DatanodeInfoBuilder().build();
+    datanodeInfo.startDecommission();
+    dNInfo[3] = datanodeInfo;
+
+    rpcServer.getDnCache().put(HdfsConstants.DatanodeReportType.LIVE, dNInfo);

Review comment:
       This is a little unconventional.
   You should mark the getter as VisibleForTesting.

##########
File path: 
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/RBFMetrics.java
##########
@@ -164,13 +163,13 @@ public RBFMetrics(Router router) throws IOException {
           RouterStore.class);
     }
 
-    // Initialize the cache for the DN reports
     Configuration conf = router.getConfig();
-    this.timeOut = conf.getTimeDuration(RBFConfigKeys.DN_REPORT_TIME_OUT,
-        RBFConfigKeys.DN_REPORT_TIME_OUT_MS_DEFAULT, TimeUnit.MILLISECONDS);
     this.topTokenRealOwners = conf.getInt(
         RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY,
         RBFConfigKeys.DFS_ROUTER_METRICS_TOP_NUM_TOKEN_OWNERS_KEY_DEFAULT);
+
+    // Use RpcServer dnCache
+    this.dnCache = this.router.getRpcServer().getDnCache();

Review comment:
       No much benefit getting and setting into an attribute.
   We can do this get the times we need to access.

##########
File path: 
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java
##########
@@ -1757,7 +1757,7 @@ public void testRBFMetricsMethodsRelayOnStateStore() {
     // These methods relays on
     // {@link RBFMetrics#getActiveNamenodeRegistration()}
     assertEquals("{}", metrics.getNameservices());
-    assertEquals(0, metrics.getNumLiveNodes());
+    assertEquals(NUM_DNS * 2, metrics.getNumLiveNodes());

Review comment:
       Why now this is like this?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 614063)
    Time Spent: 2h 10m  (was: 2h)

> RBF:  Some indicators of RBFMetrics count inaccurately
> ------------------------------------------------------
>
>                 Key: HDFS-16039
>                 URL: https://issues.apache.org/jira/browse/HDFS-16039
>             Project: Hadoop HDFS
>          Issue Type: Bug
>          Components: rbf
>    Affects Versions: 3.4.0
>            Reporter: Xiangyi Zhu
>            Assignee: Xiangyi Zhu
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 2h 10m
>  Remaining Estimate: 0h
>
> RBFMetrics#getNumLiveNodes, getNumNamenodes, getTotalCapacity
> The current statistical algorithm is to accumulate all Nn indicators, which 
> will lead to inaccurate counting. I think that the same ClusterID only needs 
> to take one Max and then do the accumulation.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org

Reply via email to