dlogothetis commented on a change in pull request #110: Counter Mechanism
URL: https://github.com/apache/giraph/pull/110#discussion_r343971453
 
 

 ##########
 File path: 
giraph-core/src/main/java/org/apache/giraph/master/BspServiceMaster.java
 ##########
 @@ -1794,6 +1804,154 @@ private void doMasterCompute() {
     timerContext.stop();
   }
 
+  /**
+   * Use the counterGroupAndNames and context, to get the counter values,
+   * create a custom counter out of each, and add to the set of counters
+   * @param context Job context
+   * @param counterGroupAndNames List of counter names
+   * @param counters Set of CustomCounter which will be populated
+   */
+  private void populateCountersFromContext(Mapper.Context context,
+           Map<String, Set<String>> counterGroupAndNames,
+           Set<CustomCounter> counters) {
+    Counter counter;
+    for (Map.Entry<String, Set<String>> entry :
+            counterGroupAndNames.entrySet()) {
+      String groupName = entry.getKey();
+      for (String counterName: entry.getValue()) {
+        CustomCounter customCounter = new CustomCounter(groupName, counterName,
+                CustomCounter.AGGREGATION.SUM);
+        counter = context.getCounter(groupName, counterName);
+        customCounter.setValue(counter.getValue());
+        counters.add(customCounter);
+      }
+    }
+  }
+
+  /**
+   * Receive the counters from the workers, and aggregate them with the
+   * master counters.
+   * The aggregated counters are stored in a thrift struct
+   */
+  private void aggregateCountersFromWorkersAndMaster() {
+    CustomCounters customCounters = new CustomCounters();
+    // Get the stats from the all the worker selected nodes
+    String workerFinishedPath = getWorkerFinishedPath(getApplicationAttempt(),
+            getSuperstep(), true);
+    List<String> workerFinishedPathList = null;
+    // Subtract 1 for the master
+    // TODO - what happens when there is only 1 worker?
+    int numWorkers = BspInputFormat.getMaxTasks(getConfiguration()) - 1;
+    if (numWorkers == 0) {
+      numWorkers += 1;
+    }
+    // Get the counter values from the zookeeper, written by the workers
+    // We keep retrying until all the workers have written
+    // TODO- wait for definite time only
+    try {
+      while (true) {
+        try {
+          workerFinishedPathList = getZkExt().getChildrenExt(
+                  workerFinishedPath, true,
+                  false, true);
+          LOG.info(String.format("Fetching counter values from " +
+                          "workers. Got %d out of %d",
+                  workerFinishedPathList.size(), numWorkers));
+          if (workerFinishedPathList.size() == numWorkers) {
+            break;
+          }
+        } catch (KeeperException e) {
+          LOG.info("Got Keeper exception, but will retry: ", e);
+        } catch (InterruptedException e) {
+          throw new IllegalStateException(
+                  "aggregateWorkerStats: InterruptedException", e);
+        }
+        Thread.sleep(1000);
+      }
+    } catch (InterruptedException ie) {
+      LOG.info("Interrupted exception");
+    }
+    for (String finishedPath : workerFinishedPathList) {
+      JSONArray jsonCounters = null;
+      try {
+        byte [] zkData =
+                getZkExt().getData(finishedPath, false, null);
+        jsonCounters = new JSONArray(new String(zkData,
+                Charset.defaultCharset()));
+        Set<CustomCounter> workerCounters = new HashSet<>();
+        for (int i = 0; i < jsonCounters.length(); i++) {
+          CustomCounter customCounter = new CustomCounter();
+          WritableUtils.readFieldsFromByteArray(Base64.decode(
+                  jsonCounters.getString(i)), customCounter);
+          workerCounters.add(customCounter);
+        }
+        customCounters.mergeCounters(workerCounters);
+      } catch (JSONException e) {
+        throw new IllegalStateException(
+                "aggregateWorkerStats: JSONException", e);
+      } catch (KeeperException e) {
+        throw new IllegalStateException(
+                "aggregateWorkerStats: KeeperException", e);
+      } catch (InterruptedException e) {
+        throw new IllegalStateException(
+                "aggregateWorkerStats: InterruptedException", e);
+      } catch (IOException e) {
+        throw new IllegalStateException("aggregateWorkerStats: IOException", 
e);
+      }
+    }
+    // Add master counters too
+    // TODO: ensure counters related to master only also appear with w=1
+    if (numWorkers != 1) {
+      // If numWorkers=1, then the master and worker share the counters
+      // Since we have already added the counters from the worker,
+      // we should not add them again here.
+      Mapper.Context context = getContext();
+      Counter counter;
+      Set<CustomCounter> masterCounterNames =
+              CustomCounters.getCustomCounters();
+      Set<CustomCounter> masterCounters = new HashSet<>();
+      for (CustomCounter customCounter : masterCounterNames) {
+        String groupName = customCounter.getGroupName();
+        String counterName = customCounter.getCounterName();
+        counter = context.getCounter(groupName, counterName);
+        customCounter.setValue(counter.getValue());
+        masterCounters.add(customCounter);
+      }
+      // Adding Netty related counters
+      Map<String, Set<String>> nettyCounters =
+              NettyClient.getCounterGroupsAndNames();
+      populateCountersFromContext(context, nettyCounters, masterCounters);
+      // Adding counters from MasterInputSplitsHandler
+      Map<String, Set<String>> inputSplitCounter =
+              MasterInputSplitsHandler.getCounterGroupAndNames();
+      populateCountersFromContext(context, inputSplitCounter, masterCounters);
+      customCounters.mergeCounters(masterCounters);
+    }
+    // Add GiraphStats
+    List<CustomCounter> allCounters = new ArrayList<>();
+    allCounters.addAll(GiraphStats.getInstance().getCounterList());
+    // Custom counters
+    allCounters.addAll(customCounters.getCounterList());
+    // Store in Thrift Struct
+    GiraphCountersThriftStruct.get().setCounters(allCounters);
 
 Review comment:
   Still not sure why we need a static instance here.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to