This is an automated email from the ASF dual-hosted git repository.

wuzhiguo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/bigtop-manager.git


The following commit(s) were added to refs/heads/main by this push:
     new 743859b7 BIGTOP-4489: Collect ZooKeeper metrics to Prometheus  (#259)
743859b7 is described below

commit 743859b7aa1d7b31b3a10c4d1fde5945dc2e5c48
Author: Zhiguo Wu <[email protected]>
AuthorDate: Wed Aug 27 00:15:39 2025 +0800

    BIGTOP-4489: Collect ZooKeeper metrics to Prometheus  (#259)
---
 .../grpc/service/JobCacheServiceGrpcImpl.java      |  10 +-
 .../bigtop/manager/common/utils/FileUtils.java     |   5 +
 .../grpc/payload/ComponentCommandPayload.java      |   7 --
 .../manager/grpc/payload/JobCachePayload.java      |   4 +
 .../server/command/helper/JobCacheHelper.java      |  62 ++++++++----
 .../command/stage/AbstractComponentStage.java      |  23 -----
 .../server/command/task/AbstractComponentTask.java |   7 --
 .../bigtop/manager/server/utils/StackUtils.java    |  20 ++--
 .../infra/1.0.0/services/grafana/metainfo.xml      |   4 +-
 .../prometheus/configuration/prometheus-rule.xml   |  65 ------------
 .../prometheus/configuration/prometheus.xml        |   4 +-
 .../infra/1.0.0/services/prometheus/metainfo.xml   |   9 +-
 .../prometheus/template/rules/zookeeper.yml        | 111 +++++++++++++++++++++
 .../stack/bigtop/v3_3_0/hadoop/HadoopParams.java   |   8 +-
 .../stack/bigtop/v3_3_0/hbase/HBaseParams.java     |   4 +-
 .../stack/bigtop/v3_3_0/hive/HiveParams.java       |   4 +-
 .../stack/bigtop/v3_3_0/kafka/KafkaSetup.java      |   2 +-
 .../stack/bigtop/v3_3_0/solr/SolrParams.java       |   2 +-
 .../stack/bigtop/v3_3_0/solr/SolrSetup.java        |   2 +-
 .../bigtop/v3_3_0/zookeeper/ZookeeperSetup.java    |   2 +-
 .../manager/stack/core/executor/StackExecutor.java |   2 +-
 .../stack/core/spi/script/AbstractScript.java      |   4 +-
 .../manager/stack/core/utils/LocalSettings.java    |  35 +++++--
 .../stack/core/utils/LocalSettingsTest.java        |  10 +-
 .../stack/extra/v1_0_0/doris/DorisParams.java      |   2 +-
 .../extra/v1_0_0/seatunnel/SeaTunnelSetup.java     |   2 +-
 .../manager/stack/infra/param/InfraParams.java     |  99 +++++++++++++++++-
 .../stack/infra/v1_0_0/grafana/GrafanaParams.java  |   2 +-
 .../infra/v1_0_0/prometheus/PrometheusParams.java  |  46 ++++++---
 .../v1_0_0/prometheus/PrometheusServerScript.java  |   2 +-
 .../infra/v1_0_0/prometheus/PrometheusSetup.java   |  11 +-
 31 files changed, 376 insertions(+), 194 deletions(-)

diff --git 
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/grpc/service/JobCacheServiceGrpcImpl.java
 
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/grpc/service/JobCacheServiceGrpcImpl.java
index 02864811..6ba57b0a 100644
--- 
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/grpc/service/JobCacheServiceGrpcImpl.java
+++ 
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/grpc/service/JobCacheServiceGrpcImpl.java
@@ -30,11 +30,13 @@ import io.grpc.stub.StreamObserver;
 import lombok.extern.slf4j.Slf4j;
 import net.devh.boot.grpc.server.service.GrpcService;
 
+import java.io.File;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 
 import static 
org.apache.bigtop.manager.common.constants.CacheFiles.CLUSTER_INFO;
+import static 
org.apache.bigtop.manager.common.constants.CacheFiles.COMPONENTS_INFO;
 import static 
org.apache.bigtop.manager.common.constants.CacheFiles.CONFIGURATIONS_INFO;
 import static org.apache.bigtop.manager.common.constants.CacheFiles.HOSTS_INFO;
 import static org.apache.bigtop.manager.common.constants.CacheFiles.REPOS_INFO;
@@ -48,17 +50,21 @@ public class JobCacheServiceGrpcImpl extends 
JobCacheServiceGrpc.JobCacheService
     public void save(JobCacheRequest request, StreamObserver<JobCacheReply> 
responseObserver) {
         try {
             JobCachePayload payload = 
JsonUtils.readFromString(request.getPayload(), JobCachePayload.class);
-            String cacheDir = ProjectPathUtils.getAgentCachePath();
+            String cacheDir = ProjectPathUtils.getAgentCachePath() + 
File.separator + payload.getClusterId();
             Path p = Paths.get(cacheDir);
             if (!Files.exists(p)) {
                 Files.createDirectories(p);
             }
 
+            String dir = p.getParent().toFile().getAbsolutePath();
+            JsonUtils.writeToFile(dir + "/current", 
payload.getCurrentClusterId());
+
             JsonUtils.writeToFile(cacheDir + CONFIGURATIONS_INFO, 
payload.getConfigurations());
-            JsonUtils.writeToFile(cacheDir + HOSTS_INFO, 
payload.getComponentHosts());
+            JsonUtils.writeToFile(cacheDir + COMPONENTS_INFO, 
payload.getComponentHosts());
             JsonUtils.writeToFile(cacheDir + USERS_INFO, 
payload.getUserInfo());
             JsonUtils.writeToFile(cacheDir + REPOS_INFO, 
payload.getRepoInfo());
             JsonUtils.writeToFile(cacheDir + CLUSTER_INFO, 
payload.getClusterInfo());
+            JsonUtils.writeToFile(cacheDir + HOSTS_INFO, payload.getHosts());
 
             JobCacheReply reply = JobCacheReply.newBuilder()
                     .setCode(MessageConstants.SUCCESS_CODE)
diff --git 
a/bigtop-manager-common/src/main/java/org/apache/bigtop/manager/common/utils/FileUtils.java
 
b/bigtop-manager-common/src/main/java/org/apache/bigtop/manager/common/utils/FileUtils.java
index 11fb315b..717b1701 100644
--- 
a/bigtop-manager-common/src/main/java/org/apache/bigtop/manager/common/utils/FileUtils.java
+++ 
b/bigtop-manager-common/src/main/java/org/apache/bigtop/manager/common/utils/FileUtils.java
@@ -33,6 +33,11 @@ import java.nio.charset.StandardCharsets;
 @NoArgsConstructor(access = AccessLevel.PRIVATE)
 public class FileUtils {
 
+    public static String readFile2Str(String filename) {
+        File file = new File(filename);
+        return readFile2Str(file);
+    }
+
     /**
      * Get Content
      *
diff --git 
a/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/ComponentCommandPayload.java
 
b/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/ComponentCommandPayload.java
index f40e0a70..80457c4f 100644
--- 
a/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/ComponentCommandPayload.java
+++ 
b/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/ComponentCommandPayload.java
@@ -24,7 +24,6 @@ import org.apache.bigtop.manager.grpc.pojo.TemplateInfo;
 import lombok.Data;
 
 import java.util.List;
-import java.util.Map;
 
 @Data
 public class ComponentCommandPayload {
@@ -46,10 +45,4 @@ public class ComponentCommandPayload {
     private List<PackageSpecificInfo> packageSpecifics;
 
     private List<TemplateInfo> templates;
-
-    /**
-     * This field is exclusively used for Prometheus and Grafana within the 
infra services.
-     * Includes cluster and corresponding hostname.
-     */
-    private Map<String, List<String>> clusterHosts;
 }
diff --git 
a/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/JobCachePayload.java
 
b/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/JobCachePayload.java
index 39fef48f..6bc152ae 100644
--- 
a/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/JobCachePayload.java
+++ 
b/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/JobCachePayload.java
@@ -29,6 +29,8 @@ import java.util.Map;
 @Data
 public class JobCachePayload {
 
+    private Long currentClusterId;
+
     private Long clusterId;
 
     private ClusterInfo clusterInfo;
@@ -40,4 +42,6 @@ public class JobCachePayload {
     private Map<String, Map<String, String>> configurations;
 
     private Map<String, List<String>> componentHosts;
+
+    private List<String> hosts;
 }
diff --git 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/helper/JobCacheHelper.java
 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/helper/JobCacheHelper.java
index 5ee1368e..d1c547da 100644
--- 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/helper/JobCacheHelper.java
+++ 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/helper/JobCacheHelper.java
@@ -54,8 +54,6 @@ import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.stream.Collectors;
 
-import static 
org.apache.bigtop.manager.common.constants.Constants.ALL_HOST_KEY;
-
 public class JobCacheHelper {
 
     private static ClusterDao clusterDao;
@@ -90,19 +88,31 @@ public class JobCacheHelper {
 
         List<CompletableFuture<Boolean>> futures = new ArrayList<>();
         for (HostPO hostPO : hostPOList) {
-            genClusterPayload(payload, hostPO.getClusterId());
-            JobCacheRequest request = JobCacheRequest.newBuilder()
-                    .setJobId(jobId)
-                    .setPayload(JsonUtils.writeAsString(payload))
-                    .build();
-            futures.add(CompletableFuture.supplyAsync(() -> {
-                JobCacheServiceGrpc.JobCacheServiceBlockingStub stub = 
GrpcClient.getBlockingStub(
-                        hostPO.getHostname(),
-                        hostPO.getGrpcPort(),
-                        JobCacheServiceGrpc.JobCacheServiceBlockingStub.class);
-                JobCacheReply reply = stub.save(request);
-                return reply != null && reply.getCode() == 
MessageConstants.SUCCESS_CODE;
-            }));
+            payload.setCurrentClusterId(hostPO.getClusterId());
+
+            List<Long> clusterIds = new ArrayList<>();
+            if (hostRequiresAllData(hostPO.getHostname())) {
+                clusterIds.addAll(
+                        
clusterDao.findAll().stream().map(ClusterPO::getId).toList());
+            } else {
+                clusterIds.add(hostPO.getClusterId());
+            }
+
+            for (Long clusterId : clusterIds) {
+                genClusterPayload(payload, clusterId);
+                JobCacheRequest request = JobCacheRequest.newBuilder()
+                        .setJobId(jobId)
+                        .setPayload(JsonUtils.writeAsString(payload))
+                        .build();
+                futures.add(CompletableFuture.supplyAsync(() -> {
+                    JobCacheServiceGrpc.JobCacheServiceBlockingStub stub = 
GrpcClient.getBlockingStub(
+                            hostPO.getHostname(),
+                            hostPO.getGrpcPort(),
+                            
JobCacheServiceGrpc.JobCacheServiceBlockingStub.class);
+                    JobCacheReply reply = stub.save(request);
+                    return reply != null && reply.getCode() == 
MessageConstants.SUCCESS_CODE;
+                }));
+            }
         }
 
         List<Boolean> results = futures.stream()
@@ -139,22 +149,22 @@ public class JobCacheHelper {
         Map<String, List<String>> componentHostMap = 
payload.getComponentHosts();
         componentHostMap.putAll(getComponentHostMap(clusterId));
 
+        List<String> hosts = hostDao.findAllByClusterId(clusterId).stream()
+                .map(HostPO::getHostname)
+                .toList();
+
         payload.setClusterId(clusterId);
         payload.setClusterInfo(clusterInfo);
         payload.setConfigurations(serviceConfigMap);
         payload.setComponentHosts(componentHostMap);
+        payload.setHosts(hosts);
     }
 
     private static void genGlobalPayload(JobCachePayload payload) {
         List<RepoPO> repoPOList = repoDao.findAll();
-        List<HostPO> hostPOList = hostDao.findAll();
-
         Map<String, Map<String, String>> serviceConfigMap = 
getServiceConfigMap(0L);
 
-        Map<String, List<String>> componentHostMap = new HashMap<>();
-        List<String> allHostnames = 
hostPOList.stream().map(HostPO::getHostname).toList();
-        componentHostMap.put(ALL_HOST_KEY, allHostnames);
-        componentHostMap.putAll(getComponentHostMap(0L));
+        Map<String, List<String>> componentHostMap = new 
HashMap<>(getComponentHostMap(0L));
 
         List<RepoInfo> repoList = new ArrayList<>();
         repoPOList.forEach(repoPO -> {
@@ -222,6 +232,16 @@ public class JobCacheHelper {
 
     private static Boolean hostRequiresAllData(String hostname) {
         // Some services like prometheus requires all clusters info to collect 
metrics.
+        List<ComponentPO> components = componentDao.findByQuery(
+                ComponentQuery.builder().hostname(hostname).build());
+        for (ComponentPO component : components) {
+            ServiceDTO serviceDTO = 
StackUtils.getServiceDTOByComponentName(component.getName());
+            StackDTO stack = StackUtils.getServiceStack(serviceDTO.getName());
+            if (stack.getStackName().equals("infra")) {
+                return true;
+            }
+        }
+
         return false;
     }
 }
diff --git 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/stage/AbstractComponentStage.java
 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/stage/AbstractComponentStage.java
index ce610b8b..5908cbad 100644
--- 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/stage/AbstractComponentStage.java
+++ 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/stage/AbstractComponentStage.java
@@ -19,7 +19,6 @@
 package org.apache.bigtop.manager.server.command.stage;
 
 import org.apache.bigtop.manager.dao.po.ClusterPO;
-import org.apache.bigtop.manager.dao.po.HostPO;
 import org.apache.bigtop.manager.dao.repository.ClusterDao;
 import org.apache.bigtop.manager.server.command.task.TaskContext;
 import org.apache.bigtop.manager.server.holder.SpringContextHolder;
@@ -27,11 +26,6 @@ import 
org.apache.bigtop.manager.server.model.dto.ComponentDTO;
 import org.apache.bigtop.manager.server.model.dto.ServiceDTO;
 import org.apache.bigtop.manager.server.utils.StackUtils;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 public abstract class AbstractComponentStage extends AbstractStage {
 
     private ClusterDao clusterDao;
@@ -78,23 +72,6 @@ public abstract class AbstractComponentStage extends 
AbstractStage {
         taskContext.setServiceUser(serviceDTO.getUser());
         taskContext.setUserGroup(clusterPO == null ? null : 
clusterPO.getUserGroup());
         taskContext.setRootDir(clusterPO == null ? null : 
clusterPO.getRootDir());
-
-        Map<String, Object> properties = new HashMap<>();
-        properties.put("clusterHosts", getClusterHosts());
-        taskContext.setProperties(properties);
         return taskContext;
     }
-
-    protected Map<String, List<String>> getClusterHosts() {
-        Map<String, List<String>> clusterHosts = new HashMap<>();
-        for (ClusterPO clusterPO : clusterDao.findAll()) {
-            List<String> hosts = new ArrayList<>();
-            for (HostPO hostPO : 
hostDao.findAllByClusterId(clusterPO.getId())) {
-                String host = hostPO.getHostname();
-                hosts.add(host);
-            }
-            clusterHosts.put(clusterPO.getName(), hosts);
-        }
-        return clusterHosts;
-    }
 }
diff --git 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/task/AbstractComponentTask.java
 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/task/AbstractComponentTask.java
index 9d141fc2..d3e621ef 100644
--- 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/task/AbstractComponentTask.java
+++ 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/task/AbstractComponentTask.java
@@ -39,7 +39,6 @@ import org.apache.bigtop.manager.server.utils.StackUtils;
 
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
 
 public abstract class AbstractComponentTask extends AbstractTask {
 
@@ -83,12 +82,6 @@ public abstract class AbstractComponentTask extends 
AbstractTask {
         payload.setTemplates(convertTemplateInfo(serviceDTO.getName(), 
serviceDTO.getTemplates()));
         
payload.setPackageSpecifics(convertPackageSpecificInfo(serviceDTO.getPackageSpecifics()));
 
-        Map<String, Object> properties = taskContext.getProperties();
-        if (stackDTO.getStackName().equals("infra")) {
-            Map<String, List<String>> clusterHosts = (Map<String, 
List<String>>) properties.get("clusterHosts");
-            payload.setClusterHosts(clusterHosts);
-        }
-
         ComponentCommandRequest.Builder requestBuilder = 
ComponentCommandRequest.newBuilder();
         requestBuilder.setPayload(JsonUtils.writeAsString(payload));
         requestBuilder.setTaskId(getTaskPO().getId());
diff --git 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/utils/StackUtils.java
 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/utils/StackUtils.java
index 42002e08..c00f57ee 100644
--- 
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/utils/StackUtils.java
+++ 
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/utils/StackUtils.java
@@ -164,12 +164,20 @@ public class StackUtils {
     private static void parseServiceTemplates(File file, String serviceName) {
         File templateFolder = new File(file.getAbsolutePath(), 
TEMPLATE_FOLDER);
         if (templateFolder.exists()) {
-            for (File templateFile :
-                    Optional.ofNullable(templateFolder.listFiles()).orElse(new 
File[0])) {
-                String filename = templateFile.getName();
-                String content = FileUtils.readFile2Str(templateFile);
-                Map<String, String> map = 
SERVICE_TEMPLATE_MAP.computeIfAbsent(serviceName, k -> new HashMap<>());
-                map.put(filename, content);
+            Map<String, String> map = 
SERVICE_TEMPLATE_MAP.computeIfAbsent(serviceName, k -> new HashMap<>());
+            parseTemplateFiles(templateFolder, templateFolder, map);
+        }
+    }
+
+    private static void parseTemplateFiles(File templateRoot, File 
currentFolder, Map<String, String> templateMap) {
+        for (File file : 
Optional.ofNullable(currentFolder.listFiles()).orElse(new File[0])) {
+            if (file.isDirectory()) {
+                parseTemplateFiles(templateRoot, file, templateMap);
+            } else {
+                String relativePath =
+                        
templateRoot.toURI().relativize(file.toURI()).getPath();
+                String content = FileUtils.readFile2Str(file);
+                templateMap.put(relativePath, content);
             }
         }
     }
diff --git 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/metainfo.xml
 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/metainfo.xml
index 45465e02..c7f7e25e 100644
--- 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/metainfo.xml
+++ 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/metainfo.xml
@@ -75,11 +75,11 @@
         <templates>
             <template>
                 <src>cluster-dashboard.json</src>
-                <dest>conf/provisioning/dashboards/cluster</dest>
+                
<dest>conf/provisioning/dashboards/cluster/cluster-dashboard.json</dest>
             </template>
             <template>
                 <src>host-dashboard.json</src>
-                <dest>conf/provisioning/dashboards/host</dest>
+                
<dest>conf/provisioning/dashboards/host/host-dashboard.json</dest>
             </template>
         </templates>
 
diff --git 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
deleted file mode 100644
index e7ee51e1..00000000
--- 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
+++ /dev/null
@@ -1,65 +0,0 @@
-<?xml version="1.0"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one
-  ~ or more contributor license agreements.  See the NOTICE file
-  ~ distributed with this work for additional information
-  ~ regarding copyright ownership.  The ASF licenses this file
-  ~ to you under the Apache License, Version 2.0 (the
-  ~ "License"); you may not use this file except in compliance
-  ~ with the License.  You may obtain a copy of the License at
-  ~
-  ~    https://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing,
-  ~ software distributed under the License is distributed on an
-  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  ~ KIND, either express or implied.  See the License for the
-  ~ specific language governing permissions and limitations
-  ~ under the License.
--->
-
-<configuration>
-    <property>
-        <name>rules_file_name</name>
-        <value>prometheus_rules.yml</value>
-        <description>Rules file name</description>
-    </property>
-    <property>
-        <name>content</name>
-        <description>This is the freemarker template for rules 
file</description>
-        <value><![CDATA[
-groups:
-  # Recording rules group: Used to calculate and save new aggregated metrics
-  - name: example_recording_rules
-    interval: 1m  # The frequency at which the rules are evaluated
-
-    rules:
-      # Recording rule: Calculate the average CPU usage over the last 5 
minutes for each job
-      - record: job:cpu_usage:avg
-        expr: avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (job)
-        # This creates a new metric `job:cpu_usage:avg` representing the 
average CPU usage per job
-
-  # Alerting rules group: Used to trigger alerts based on conditions
-  - name: example_alerting_rules
-    interval: 1m  # The frequency at which the alerting rules are evaluated
-
-    rules:
-      # Alerting rule: Trigger an alert if the average CPU usage is over 90% 
for the last 5 minutes
-      - alert: HighCpuUsage
-        expr: avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (instance) 
> 0.9
-        # This expression checks if the average CPU usage over the last 5 
minutes for each instance is greater than 90%
-        for: 5m  # The condition must hold true for 5 minutes before the alert 
is triggered
-        labels:
-          severity: critical  # Set the severity of the alert as 'critical'
-        annotations:
-          summary: "CPU usage on instance {{ $labels.instance }} is over 90% 
for the last 5 minutes"
-          # Summary of the alert that will appear when it triggers
-          description: "The CPU usage on instance {{ $labels.instance }} has 
been over 90% for the past 5 minutes."
-          # Detailed description of the alert that will provide more context
-]]>
-        </value>
-        <attrs>
-            <type>longtext</type>
-        </attrs>
-    </property>
-</configuration>
\ No newline at end of file
diff --git 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
index 215bec8b..201982bf 100644
--- 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
+++ 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
@@ -39,9 +39,7 @@ global:
 # Rule files specifies a list of globs. Rules and alerts are read from
 # all matching files.
 rule_files:
-<#if rules_file_name??>
-  - ${rules_file_name}
-</#if>
+  - rules/zookeeper.yml
 
 # A scrape configuration containing exactly one endpoint to scrape:
 # Here it's Prometheus itself.
diff --git 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/metainfo.xml
 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/metainfo.xml
index dfc1850b..77d3297f 100644
--- 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/metainfo.xml
+++ 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/metainfo.xml
@@ -71,5 +71,12 @@
                 </packages>
             </package-specific>
         </package-specifics>
+
+        <templates>
+            <template>
+                <src>rules/zookeeper.yml</src>
+                <dest>rules/zookeeper.yml</dest>
+            </template>
+        </templates>
     </service>
-</metainfo>
\ No newline at end of file
+</metainfo>
diff --git 
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/template/rules/zookeeper.yml
 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/template/rules/zookeeper.yml
new file mode 100644
index 00000000..2cba399d
--- /dev/null
+++ 
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/template/rules/zookeeper.yml
@@ -0,0 +1,111 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+groups:
+  - name: zookeeper-alerts
+    rules:
+      - alert: ZooKeeper server is down
+        expr: up{job=~".*-zookeeper"} == 0
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "Instance {{ $labels.instance }} ZooKeeper server is down"
+          description: "{{ $labels.instance }} of job {{$labels.job}} 
ZooKeeper server is down: [{{ $value }}]."
+
+      - alert: create too many znodes
+        expr: znode_count{job=~".*-zookeeper"} > 1000000
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Instance {{ $labels.instance }} create too many znodes"
+          description: "{{ $labels.instance }} of job {{$labels.job}} create 
too many znodes: [{{ $value }}]."
+
+      - alert: create too many connections
+        expr: num_alive_connections{job=~".*-zookeeper"} > 50 # suppose we use 
the default maxClientCnxns: 60
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Instance {{ $labels.instance }} create too many 
connections"
+          description: "{{ $labels.instance }} of job {{$labels.job}} create 
too many connections: [{{ $value }}]."
+
+      - alert: znode total occupied memory is too big
+        expr: approximate_data_size{job=~".*-zookeeper"} /1024 /1024 > 1 * 
1024 # more than 1024 MB(1 GB)
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Instance {{ $labels.instance }} znode total occupied 
memory is too big"
+          description: "{{ $labels.instance }} of job {{$labels.job}} znode 
total occupied memory is too big: [{{ $value }}] MB."
+
+      - alert: set too many watch
+        expr: watch_count{job=~".*-zookeeper"} > 10000
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Instance {{ $labels.instance }} set too many watch"
+          description: "{{ $labels.instance }} of job {{$labels.job}} set too 
many watch: [{{ $value }}]."
+
+      - alert: a leader election happens
+        expr: increase(election_time_count{job=~".*-zookeeper"}[5m]) > 0
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Instance {{ $labels.instance }} a leader election happens"
+          description: "{{ $labels.instance }} of job {{$labels.job}} a leader 
election happens: [{{ $value }}]."
+
+      - alert: open too many files
+        expr: open_file_descriptor_count{job=~".*-zookeeper"} > 300
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Instance {{ $labels.instance }} open too many files"
+          description: "{{ $labels.instance }} of job {{$labels.job}} open too 
many files: [{{ $value }}]."
+
+      - alert: fsync time is too long
+        expr: rate(fsynctime_sum{job=~".*-zookeeper"}[1m]) > 100
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Instance {{ $labels.instance }} fsync time is too long"
+          description: "{{ $labels.instance }} of job {{$labels.job}} fsync 
time is too long: [{{ $value }}]."
+
+      - alert: take snapshot time is too long
+        expr: rate(snapshottime_sum{job=~".*-zookeeper"}[5m]) > 100
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Instance {{ $labels.instance }} take snapshot time is too 
long"
+          description: "{{ $labels.instance }} of job {{$labels.job}} take 
snapshot time is too long: [{{ $value }}]."
+
+      - alert: avg latency is too high
+        expr: avg_latency{job=~".*-zookeeper"} > 100
+        for: 1m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Instance {{ $labels.instance }} avg latency is too high"
+          description: "{{ $labels.instance }} of job {{$labels.job}} avg 
latency is too high: [{{ $value }}]."
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hadoop/HadoopParams.java
 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hadoop/HadoopParams.java
index b3110a7a..356c6e03 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hadoop/HadoopParams.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hadoop/HadoopParams.java
@@ -70,7 +70,7 @@ public class HadoopParams extends BigtopParams {
         super(componentCommandPayload);
         globalParamsMap.put("hdfs_user", user());
         globalParamsMap.put("hdfs_group", group());
-        globalParamsMap.put("datanode_hosts", LocalSettings.hosts("datanode"));
+        globalParamsMap.put("datanode_hosts", 
LocalSettings.componentHosts("datanode"));
         globalParamsMap.put("java_home", javaHome());
         globalParamsMap.put("hadoop_home", serviceHome());
         globalParamsMap.put("hadoop_conf_dir", confDir());
@@ -96,7 +96,7 @@ public class HadoopParams extends BigtopParams {
     @GlobalParams
     public Map<String, Object> coreSite() {
         Map<String, Object> coreSite = 
LocalSettings.configurations(getServiceName(), "core-site");
-        List<String> namenodeList = LocalSettings.hosts("namenode");
+        List<String> namenodeList = LocalSettings.componentHosts("namenode");
         if (!namenodeList.isEmpty()) {
             coreSite.put(
                     "fs.defaultFS", ((String) 
coreSite.get("fs.defaultFS")).replace("localhost", namenodeList.get(0)));
@@ -112,7 +112,7 @@ public class HadoopParams extends BigtopParams {
     @GlobalParams
     public Map<String, Object> hdfsSite() {
         Map<String, Object> hdfsSite = 
LocalSettings.configurations(getServiceName(), "hdfs-site");
-        List<String> namenodeList = LocalSettings.hosts("namenode");
+        List<String> namenodeList = LocalSettings.componentHosts("namenode");
         if (!namenodeList.isEmpty()) {
             hdfsSite.put(
                     "dfs.namenode.rpc-address",
@@ -147,7 +147,7 @@ public class HadoopParams extends BigtopParams {
     @GlobalParams
     public Map<String, Object> yarnSite() {
         Map<String, Object> yarnSite = 
LocalSettings.configurations(getServiceName(), "yarn-site");
-        List<String> resourcemanagerList = 
LocalSettings.hosts("resourcemanager");
+        List<String> resourcemanagerList = 
LocalSettings.componentHosts("resourcemanager");
         if (!resourcemanagerList.isEmpty()) {
             yarnSite.put("yarn.resourcemanager.hostname", 
MessageFormat.format("{0}", resourcemanagerList.get(0)));
         }
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hbase/HBaseParams.java
 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hbase/HBaseParams.java
index ab8da72f..cfb29f32 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hbase/HBaseParams.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hbase/HBaseParams.java
@@ -56,7 +56,7 @@ public class HBaseParams extends BigtopParams {
         globalParamsMap.put("security_enabled", false);
         globalParamsMap.put("hbase_user", user());
         globalParamsMap.put("hbase_group", group());
-        globalParamsMap.put("regionserver_hosts", 
LocalSettings.hosts("hbase_regionserver"));
+        globalParamsMap.put("regionserver_hosts", 
LocalSettings.componentHosts("hbase_regionserver"));
 
         hbaseMasterPidFile = hbasePidDir + "/hbase-" + user() + "-master.pid";
         hbaseRegionServerPidFile = hbasePidDir + "/hbase-" + user() + 
"-regionserver.pid";
@@ -75,7 +75,7 @@ public class HBaseParams extends BigtopParams {
     @GlobalParams
     public Map<String, Object> hbaseSite() {
         Map<String, Object> configurations = 
LocalSettings.configurations(getServiceName(), "hbase-site");
-        List<String> zookeeperQuorum = LocalSettings.hosts("zookeeper_server");
+        List<String> zookeeperQuorum = 
LocalSettings.componentHosts("zookeeper_server");
         Map<String, Object> zooCfg = LocalSettings.configurations("zookeeper", 
"zoo.cfg");
 
         // Auto generate properties for hbase-site.xml
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java
 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java
index 176f9b4f..c20cbd25 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java
@@ -82,13 +82,13 @@ public class HiveParams extends BigtopParams {
 
         // Auto generate zookeeper properties for hive-site.xml
         Map<String, Object> zooCfg = LocalSettings.configurations("zookeeper", 
"zoo.cfg");
-        List<String> zookeeperQuorum = LocalSettings.hosts("zookeeper_server");
+        List<String> zookeeperQuorum = 
LocalSettings.componentHosts("zookeeper_server");
 
         configurations.put("hive.zookeeper.client.port", 
zooCfg.get("clientPort"));
         configurations.put("hive.zookeeper.quorum", String.join(",", 
zookeeperQuorum));
 
         // Auto generate database properties for hive-site.xml
-        String mysqlHost = LocalSettings.hosts("mysql_server").get(0);
+        String mysqlHost = LocalSettings.componentHosts("mysql_server").get(0);
         String mysqlPassword = LocalSettings.configurations("mysql", "common")
                 .get("root_password")
                 .toString();
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/kafka/KafkaSetup.java
 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/kafka/KafkaSetup.java
index f94049b1..e7e13b39 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/kafka/KafkaSetup.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/kafka/KafkaSetup.java
@@ -53,7 +53,7 @@ public class KafkaSetup {
         LinuxFileUtils.createDirectories(kafkaParams.getKafkaLogDir(), 
kafkaUser, kafkaGroup, PERMISSION_755, true);
         LinuxFileUtils.createDirectories(kafkaParams.getKafkaPidDir(), 
kafkaUser, kafkaGroup, PERMISSION_755, true);
 
-        List<String> zookeeperServerHosts = 
LocalSettings.hosts("zookeeper_server");
+        List<String> zookeeperServerHosts = 
LocalSettings.componentHosts("zookeeper_server");
         Map<String, Object> paramMap = new HashMap<>();
         paramMap.put("zk_server_list", zookeeperServerHosts);
         paramMap.put("host", kafkaParams.hostname());
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrParams.java
 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrParams.java
index f08d6896..e4408d24 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrParams.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrParams.java
@@ -71,7 +71,7 @@ public class SolrParams extends BigtopParams {
         solrPort = (String) solrEnv.get("solr_port");
         solrPidFile = solrPidDir + "/solr-" + solrPort + ".pid";
 
-        List<String> ZookeeperServerHosts = 
LocalSettings.hosts("zookeeper_server");
+        List<String> ZookeeperServerHosts = 
LocalSettings.componentHosts("zookeeper_server");
         Map<String, Object> ZKPort = LocalSettings.configurations("zookeeper", 
"zoo.cfg");
         String clientPort = (String) ZKPort.get("clientPort");
         zNode = (String) solrEnv.get("solr_znode");
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrSetup.java
 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrSetup.java
index e4543a25..f38c1892 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrSetup.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrSetup.java
@@ -51,7 +51,7 @@ public class SolrSetup {
         LinuxFileUtils.createDirectories(solrParams.getSolrLogDir(), solrUser, 
solrGroup, PERMISSION_755, true);
         LinuxFileUtils.createDirectories(solrParams.getSolrPidDir(), solrUser, 
solrGroup, PERMISSION_755, true);
 
-        List<String> zookeeperServerHosts = 
LocalSettings.hosts("zookeeper_server");
+        List<String> zookeeperServerHosts = 
LocalSettings.componentHosts("zookeeper_server");
         Map<String, Object> paramMap = new HashMap<>();
         paramMap.put("zookeeper_quorum", zookeeperServerHosts);
         paramMap.put("host", solrParams.hostname());
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/zookeeper/ZookeeperSetup.java
 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/zookeeper/ZookeeperSetup.java
index 1721b0f1..2717ea10 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/zookeeper/ZookeeperSetup.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/zookeeper/ZookeeperSetup.java
@@ -48,7 +48,7 @@ public class ZookeeperSetup {
         String zookeeperGroup = zookeeperParams.group();
         Map<String, Object> zookeeperEnv = zookeeperParams.zookeeperEnv();
         Map<String, Object> zooCfg = zookeeperParams.zooCfg();
-        List<String> zkHostList = LocalSettings.hosts("zookeeper_server");
+        List<String> zkHostList = 
LocalSettings.componentHosts("zookeeper_server");
 
         LinuxFileUtils.createDirectories(
                 zookeeperParams.getZookeeperDataDir(), zookeeperUser, 
zookeeperGroup, Constants.PERMISSION_755, true);
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/executor/StackExecutor.java
 
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/executor/StackExecutor.java
index 16067ffd..dd0278bc 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/executor/StackExecutor.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/executor/StackExecutor.java
@@ -44,7 +44,7 @@ public class StackExecutor {
 
     private static final Map<String, Hook> HOOK_MAP = new 
PrioritySPIFactory<>(Hook.class).getSPIMap();
 
-    private static Script 
getCommandScript(org.apache.bigtop.manager.grpc.payload.ComponentCommandPayload 
payload) {
+    private static Script getCommandScript(ComponentCommandPayload payload) {
         String componentName = payload.getComponentName();
         Script script = SCRIPT_MAP.get(componentName);
         if (script == null) {
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/spi/script/AbstractScript.java
 
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/spi/script/AbstractScript.java
index 913d66cd..cc53a12c 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/spi/script/AbstractScript.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/spi/script/AbstractScript.java
@@ -77,8 +77,8 @@ public abstract class AbstractScript implements Script {
     public ShellResult configure(Params params) {
         List<TemplateInfo> templates = params.templates();
         for (TemplateInfo template : templates) {
-            String dir = params.serviceHome() + "/" + template.getDest();
-            String filename = dir + "/" + template.getSrc();
+            String filename = params.serviceHome() + "/" + template.getDest();
+            String dir = Path.of(filename).getParent().toString();
             LinuxFileUtils.createDirectories(dir, params.user(), 
params.group(), PERMISSION_755, true);
             LinuxFileUtils.toFile(
                     ConfigType.CONTENT,
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/LocalSettings.java
 
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/LocalSettings.java
index f8690599..4cfcb877 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/LocalSettings.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/LocalSettings.java
@@ -19,6 +19,7 @@
 package org.apache.bigtop.manager.stack.core.utils;
 
 import org.apache.bigtop.manager.common.constants.CacheFiles;
+import org.apache.bigtop.manager.common.utils.FileUtils;
 import org.apache.bigtop.manager.common.utils.JsonUtils;
 import org.apache.bigtop.manager.common.utils.ProjectPathUtils;
 import org.apache.bigtop.manager.common.utils.os.OSDetection;
@@ -30,6 +31,7 @@ import com.fasterxml.jackson.core.type.TypeReference;
 import lombok.extern.slf4j.Slf4j;
 
 import java.io.File;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -44,7 +46,7 @@ public class LocalSettings {
 
     public static Map<String, Object> configurations(String service, String 
type) {
         Map<String, Object> configDataMap = new HashMap<>();
-        File file = createFile(ProjectPathUtils.getAgentCachePath() + 
CacheFiles.CONFIGURATIONS_INFO);
+        File file = createFile(clusterCacheDir() + 
CacheFiles.CONFIGURATIONS_INFO);
         try {
             if (file.exists()) {
                 Map<String, Map<String, Object>> configJson = 
JsonUtils.readFromFile(file, new TypeReference<>() {});
@@ -61,22 +63,31 @@ public class LocalSettings {
         return configDataMap;
     }
 
-    public static List<String> hosts(String componentName) {
-        return hosts().getOrDefault(componentName, List.of());
+    public static List<String> componentHosts(String componentName) {
+        return componentHosts().getOrDefault(componentName, List.of());
     }
 
-    public static Map<String, List<String>> hosts() {
+    public static Map<String, List<String>> componentHosts() {
         Map<String, List<String>> hostJson = new HashMap<>();
-        File file = createFile(ProjectPathUtils.getAgentCachePath() + 
CacheFiles.HOSTS_INFO);
+        File file = createFile(clusterCacheDir() + CacheFiles.COMPONENTS_INFO);
         if (file.exists()) {
             hostJson = JsonUtils.readFromFile(file, new TypeReference<>() {});
         }
         return hostJson;
     }
 
+    public static List<String> clusterHosts() {
+        List<String> hosts = new ArrayList<>();
+        File file = createFile(clusterCacheDir() + CacheFiles.COMPONENTS_INFO);
+        if (file.exists()) {
+            hosts = JsonUtils.readFromFile(file, new TypeReference<>() {});
+        }
+        return hosts;
+    }
+
     public static Map<String, Object> basicInfo() {
         Map<String, Object> settings = new HashMap<>();
-        File file = createFile(ProjectPathUtils.getAgentCachePath() + 
CacheFiles.SETTINGS_INFO);
+        File file = createFile(clusterCacheDir() + CacheFiles.SETTINGS_INFO);
         if (file.exists()) {
             settings = JsonUtils.readFromFile(file, new TypeReference<>() {});
         }
@@ -85,7 +96,7 @@ public class LocalSettings {
 
     public static Map<String, String> users() {
         Map<String, String> userMap = new HashMap<>();
-        File file = createFile(ProjectPathUtils.getAgentCachePath() + 
CacheFiles.USERS_INFO);
+        File file = createFile(clusterCacheDir() + CacheFiles.USERS_INFO);
         if (file.exists()) {
             userMap = JsonUtils.readFromFile(file, new TypeReference<>() {});
         }
@@ -110,7 +121,7 @@ public class LocalSettings {
 
     public static List<RepoInfo> repos() {
         List<RepoInfo> repoInfoList = List.of();
-        File file = createFile(ProjectPathUtils.getAgentCachePath() + 
CacheFiles.REPOS_INFO);
+        File file = createFile(clusterCacheDir() + CacheFiles.REPOS_INFO);
         if (file.exists()) {
             repoInfoList = JsonUtils.readFromFile(file, new TypeReference<>() 
{});
         }
@@ -119,13 +130,19 @@ public class LocalSettings {
 
     public static ClusterInfo cluster() {
         ClusterInfo clusterInfo = new ClusterInfo();
-        File file = createFile(ProjectPathUtils.getAgentCachePath() + 
CacheFiles.CLUSTER_INFO);
+        File file = createFile(clusterCacheDir() + CacheFiles.CLUSTER_INFO);
         if (file.exists()) {
             clusterInfo = JsonUtils.readFromFile(file, new TypeReference<>() 
{});
         }
         return clusterInfo;
     }
 
+    protected static String clusterCacheDir() {
+        String agentCachePath = ProjectPathUtils.getAgentCachePath();
+        String clusterId = FileUtils.readFile2Str(agentCachePath + 
File.separator + "current");
+        return agentCachePath + File.separator + clusterId;
+    }
+
     protected static File createFile(String fileName) {
         return new File(fileName);
     }
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-core/src/test/java/org/apache/bigtop/manager/stack/core/utils/LocalSettingsTest.java
 
b/bigtop-manager-stack/bigtop-manager-stack-core/src/test/java/org/apache/bigtop/manager/stack/core/utils/LocalSettingsTest.java
index 73d0ca87..92e4d8dc 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-core/src/test/java/org/apache/bigtop/manager/stack/core/utils/LocalSettingsTest.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-core/src/test/java/org/apache/bigtop/manager/stack/core/utils/LocalSettingsTest.java
@@ -108,7 +108,7 @@ public class LocalSettingsTest {
     }
 
     @Test
-    public void testHosts() {
+    public void testComponentHosts() {
         String componentName = "componentA";
 
         Map<String, List<String>> hostJson = new HashMap<>();
@@ -123,11 +123,13 @@ public class LocalSettingsTest {
         localSettingsMockedStatic
                 .when(() -> JsonUtils.readFromFile(any(File.class), 
any(TypeReference.class)))
                 .thenReturn(hostJson);
-        localSettingsMockedStatic.when(() -> 
LocalSettings.hosts(anyString())).thenCallRealMethod();
-        
localSettingsMockedStatic.when(LocalSettings::hosts).thenCallRealMethod();
+        localSettingsMockedStatic
+                .when(() -> LocalSettings.componentHosts(anyString()))
+                .thenCallRealMethod();
+        
localSettingsMockedStatic.when(LocalSettings::componentHosts).thenCallRealMethod();
 
         List<String> expectedHosts = List.of("host1", "host2");
-        assertEquals(expectedHosts, LocalSettings.hosts(componentName));
+        assertEquals(expectedHosts, 
LocalSettings.componentHosts(componentName));
     }
 
     @Test
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/doris/DorisParams.java
 
b/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/doris/DorisParams.java
index 8f1f200f..05794f31 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/doris/DorisParams.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/doris/DorisParams.java
@@ -90,7 +90,7 @@ public class DorisParams extends ExtraParams {
     }
 
     public List<String> dorisFeHosts() {
-        return LocalSettings.hosts("doris_fe");
+        return LocalSettings.componentHosts("doris_fe");
     }
 
     public int dorisFeHttpPort() {
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/seatunnel/SeaTunnelSetup.java
 
b/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/seatunnel/SeaTunnelSetup.java
index dfa70540..b8f4881b 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/seatunnel/SeaTunnelSetup.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/seatunnel/SeaTunnelSetup.java
@@ -141,7 +141,7 @@ public class SeaTunnelSetup {
 
     private static List<String> hostPort(String componentName, String port, 
int spacesNum) {
         String spaces = " ".repeat(spacesNum);
-        List<String> hostList = LocalSettings.hosts(componentName);
+        List<String> hostList = LocalSettings.componentHosts(componentName);
         hostList.sort(String::compareToIgnoreCase);
         List<String> hostPortList = new ArrayList<>();
         for (String host : hostList) {
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/param/InfraParams.java
 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/param/InfraParams.java
index 33e3ce2d..dc22be51 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/param/InfraParams.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/param/InfraParams.java
@@ -18,14 +18,24 @@
  */
 package org.apache.bigtop.manager.stack.infra.param;
 
+import org.apache.bigtop.manager.common.constants.CacheFiles;
+import org.apache.bigtop.manager.common.utils.JsonUtils;
+import org.apache.bigtop.manager.common.utils.ProjectPathUtils;
 import org.apache.bigtop.manager.grpc.payload.ComponentCommandPayload;
+import org.apache.bigtop.manager.grpc.pojo.ClusterInfo;
 import org.apache.bigtop.manager.stack.core.spi.param.BaseParams;
+import org.apache.bigtop.manager.stack.core.utils.LocalSettings;
 
+import com.fasterxml.jackson.core.type.TypeReference;
 import lombok.NoArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 
+import java.io.File;
+import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 
 @Slf4j
 @NoArgsConstructor
@@ -45,8 +55,91 @@ public abstract class InfraParams extends BaseParams {
         return "infra";
     }
 
-    public Map<String, List<String>> getClusterHosts() {
-        // In Component Status stage, clusterHosts is null
-        return payload.getClusterHosts();
+    protected Map<String, List<String>> getClusterHosts() {
+        Map<String, List<String>> clusterHosts = new HashMap<>();
+        List<String> subDirs = getClusterDirs();
+
+        for (String subDir : subDirs) {
+            List<String> hosts = JsonUtils.readFromFile(subDir + 
CacheFiles.HOSTS_INFO);
+            Map<String, String> clusterInfo = JsonUtils.readFromFile(subDir + 
CacheFiles.CLUSTER_INFO);
+            clusterHosts.put(clusterInfo.get("name"), hosts);
+        }
+
+        return clusterHosts;
+    }
+
+    protected Map<String, List<String>> getComponentHosts(String 
componentName) {
+        Map<String, List<String>> componentHosts = new HashMap<>();
+        List<String> subDirs = getClusterDirs();
+
+        for (String subDir : subDirs) {
+            Map<String, List<String>> components = 
JsonUtils.readFromFile(subDir + CacheFiles.COMPONENTS_INFO);
+            Map<String, String> clusterInfo = JsonUtils.readFromFile(subDir + 
CacheFiles.CLUSTER_INFO);
+
+            List<String> hosts = components.getOrDefault(componentName, 
List.of());
+            componentHosts.put(clusterInfo.get("name"), hosts);
+        }
+
+        return componentHosts;
+    }
+
+    protected Map<String, Map<String, Object>> configurations(String service, 
String type) {
+        Map<String, Map<String, Object>> configurations = new HashMap<>();
+        List<String> subDirs = getClusterDirs();
+
+        for (String subDir : subDirs) {
+            Map<String, String> clusterInfo = JsonUtils.readFromFile(subDir + 
CacheFiles.CLUSTER_INFO);
+            Map<String, Map<String, Object>> configJson =
+                    JsonUtils.readFromFile(subDir + 
CacheFiles.CONFIGURATIONS_INFO);
+
+            Object configData =
+                    configJson.getOrDefault(service, new 
HashMap<>()).get(type);
+            Map<String, Object> map = 
JsonUtils.readFromString(configData.toString());
+            configurations.put(clusterInfo.get("name"), map);
+        }
+
+        return configurations;
+    }
+
+    protected Map<String, Object> configurations(String service, String type, 
String key) {
+        Map<String, Object> configurations = new HashMap<>();
+        List<String> subDirs = getClusterDirs();
+
+        for (String subDir : subDirs) {
+            Map<String, String> clusterInfo = JsonUtils.readFromFile(subDir + 
CacheFiles.CLUSTER_INFO);
+            Map<String, Map<String, Object>> configJson =
+                    JsonUtils.readFromFile(subDir + 
CacheFiles.CONFIGURATIONS_INFO);
+
+            Object configData =
+                    configJson.getOrDefault(service, new 
HashMap<>()).get(type);
+            Map<String, Object> map = 
JsonUtils.readFromString(configData.toString());
+            configurations.put(clusterInfo.get("name"), map.getOrDefault(key, 
null));
+        }
+
+        return configurations;
+    }
+
+    protected List<ClusterInfo> clusters() {
+        return getClusterDirs().stream()
+                .map(dir -> JsonUtils.readFromFile(dir + 
CacheFiles.CLUSTER_INFO, new TypeReference<ClusterInfo>() {}))
+                .toList();
+    }
+
+    /**
+     * Get the cluster info of the host where the component is running on.
+     * Since infra service can be installed across clusters, this will get 
different cluster info based on the host.
+     *
+     * @return ClusterInfo
+     */
+    protected ClusterInfo hostCluster() {
+        return LocalSettings.cluster();
+    }
+
+    private List<String> getClusterDirs() {
+        File file = new File(ProjectPathUtils.getAgentCachePath());
+        return Arrays.stream(Objects.requireNonNull(file.listFiles()))
+                .filter(File::isDirectory)
+                .map(File::getAbsolutePath)
+                .toList();
     }
 }
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java
 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java
index 22a215ef..f5a92140 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java
@@ -115,7 +115,7 @@ public class GrafanaParams extends InfraParams {
     @GlobalParams
     public Map<String, Object> prometheus() {
         Map<String, Object> configuration = 
LocalSettings.configurations(getServiceName(), "grafana-datasources");
-        List<String> prometheusServers = 
LocalSettings.hosts().get("prometheus_server");
+        List<String> prometheusServers = 
LocalSettings.componentHosts().get("prometheus_server");
         if (prometheusServers == null || prometheusServers.isEmpty()) {
             return configuration;
         }
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
index 63bbd94f..063cc4df 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
@@ -19,6 +19,7 @@
 package org.apache.bigtop.manager.stack.infra.v1_0_0.prometheus;
 
 import org.apache.bigtop.manager.grpc.payload.ComponentCommandPayload;
+import org.apache.bigtop.manager.grpc.pojo.ClusterInfo;
 import org.apache.bigtop.manager.stack.core.annotations.GlobalParams;
 import org.apache.bigtop.manager.stack.core.spi.param.Params;
 import org.apache.bigtop.manager.stack.core.utils.LocalSettings;
@@ -48,11 +49,10 @@ public class PrometheusParams extends InfraParams {
 
     private Map<String, Object> prometheusScrapeJob;
     private Map<String, Object> agentScrapeJob;
+    private List<Map<String, Object>> zookeeperScrapeJobs;
     private List<Map<String, Object>> scrapeJobs;
     private String prometheusPort;
     private String prometheusContent;
-    private String prometheusRulesFilename;
-    private String prometheusRulesFileContent;
 
     public PrometheusParams(ComponentCommandPayload componentCommandPayload) {
         super(componentCommandPayload);
@@ -67,11 +67,13 @@ public class PrometheusParams extends InfraParams {
         super.initGlobalParams();
 
         setAgentScrapeJob();
+        setZookeeperScrapeJob();
+
         scrapeJobs = new ArrayList<>();
         scrapeJobs.add(prometheusScrapeJob);
         scrapeJobs.add(agentScrapeJob);
+        scrapeJobs.addAll(zookeeperScrapeJobs);
         globalParamsMap.put("scrape_jobs", scrapeJobs);
-        globalParamsMap.put("rules_file_name", prometheusRulesFilename);
     }
 
     public String dataDir() {
@@ -111,15 +113,6 @@ public class PrometheusParams extends InfraParams {
         return configuration;
     }
 
-    @GlobalParams
-    public Map<String, Object> rules() {
-        Map<String, Object> configuration = 
LocalSettings.configurations(getServiceName(), "prometheus-rule");
-
-        prometheusRulesFilename = (String) 
configuration.get("rules_file_name");
-        prometheusRulesFileContent = (String) configuration.get("content");
-        return configuration;
-    }
-
     public String listenAddress() {
         return MessageFormat.format("0.0.0.0:{0}", prometheusPort);
     }
@@ -147,4 +140,33 @@ public class PrometheusParams extends InfraParams {
 
         agentScrapeJob.put("targets_list", agentTargets);
     }
+
+    public void setZookeeperScrapeJob() {
+        zookeeperScrapeJobs = new ArrayList<>();
+        Map<String, Map<String, Object>> configurations = 
configurations("zookeeper", "zoo.cfg");
+        for (ClusterInfo clusterInfo : clusters()) {
+            Map<String, Object> zooCfg = 
configurations.get(clusterInfo.getName());
+            Object metricsClass = zooCfg.get("metricsProvider.className");
+            String defaultProvider = 
"org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider";
+            if (metricsClass == null || !metricsClass.equals(defaultProvider)) 
{
+                continue;
+            }
+
+            String clusterName = clusterInfo.getName();
+            String jobName = MessageFormat.format("{0}-zookeeper", 
clusterName);
+            Map<String, Object> job = new HashMap<>();
+            job.put("name", jobName);
+            job.put("targets_file", targetsConfigFile(jobName));
+
+            Map<String, Object> target = new HashMap<>();
+            List<String> zkServers = 
getComponentHosts("zookeeper_server").get(clusterName);
+            Object port = zooCfg.getOrDefault("metricsProvider.httpPort", 
7000L);
+
+            List<String> targets = zkServers.stream().map(s -> s + ":" + 
port).toList();
+            target.put("targets", targets);
+            job.put("targets_list", List.of(target));
+
+            zookeeperScrapeJobs.add(job);
+        }
+    }
 }
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
index e2bf18e2..c3c3b931 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
@@ -54,7 +54,7 @@ public class PrometheusServerScript extends 
AbstractServerScript {
         PrometheusParams prometheusParams = (PrometheusParams) params;
         String cmd = MessageFormat.format(
                 "nohup {0}/prometheus --config.file={1}/prometheus.yml 
--web.listen-address={2} --storage.tsdb.path={0}/data > {0}/nohup.out 2>&1 &",
-                prometheusParams.serviceHome(), prometheusParams.confDir(), 
prometheusParams.listenAddress());
+                prometheusParams.serviceHome(), 
prometheusParams.serviceHome(), prometheusParams.listenAddress());
         try {
             ShellResult shellResult = LinuxOSUtils.sudoExecCmd(cmd, 
prometheusParams.user());
             if (shellResult.getExitCode() != 0) {
diff --git 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
index a67e7f49..a9e0979e 100644
--- 
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
+++ 
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
@@ -45,16 +45,7 @@ public class PrometheusSetup {
 
         LinuxFileUtils.toFileByTemplate(
                 prometheusParams.getPrometheusContent(),
-                MessageFormat.format("{0}/prometheus.yml", 
prometheusParams.confDir()),
-                user,
-                group,
-                Constants.PERMISSION_644,
-                prometheusParams.getGlobalParamsMap());
-
-        LinuxFileUtils.toFileByTemplate(
-                prometheusParams.getPrometheusRulesFileContent(),
-                MessageFormat.format(
-                        "{0}/{1}", prometheusParams.confDir(), 
prometheusParams.getPrometheusRulesFilename()),
+                MessageFormat.format("{0}/prometheus.yml", 
prometheusParams.serviceHome()),
                 user,
                 group,
                 Constants.PERMISSION_644,

Reply via email to