This is an automated email from the ASF dual-hosted git repository.
wuzhiguo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/bigtop-manager.git
The following commit(s) were added to refs/heads/main by this push:
new 743859b7 BIGTOP-4489: Collect ZooKeeper metrics to Prometheus (#259)
743859b7 is described below
commit 743859b7aa1d7b31b3a10c4d1fde5945dc2e5c48
Author: Zhiguo Wu <[email protected]>
AuthorDate: Wed Aug 27 00:15:39 2025 +0800
BIGTOP-4489: Collect ZooKeeper metrics to Prometheus (#259)
---
.../grpc/service/JobCacheServiceGrpcImpl.java | 10 +-
.../bigtop/manager/common/utils/FileUtils.java | 5 +
.../grpc/payload/ComponentCommandPayload.java | 7 --
.../manager/grpc/payload/JobCachePayload.java | 4 +
.../server/command/helper/JobCacheHelper.java | 62 ++++++++----
.../command/stage/AbstractComponentStage.java | 23 -----
.../server/command/task/AbstractComponentTask.java | 7 --
.../bigtop/manager/server/utils/StackUtils.java | 20 ++--
.../infra/1.0.0/services/grafana/metainfo.xml | 4 +-
.../prometheus/configuration/prometheus-rule.xml | 65 ------------
.../prometheus/configuration/prometheus.xml | 4 +-
.../infra/1.0.0/services/prometheus/metainfo.xml | 9 +-
.../prometheus/template/rules/zookeeper.yml | 111 +++++++++++++++++++++
.../stack/bigtop/v3_3_0/hadoop/HadoopParams.java | 8 +-
.../stack/bigtop/v3_3_0/hbase/HBaseParams.java | 4 +-
.../stack/bigtop/v3_3_0/hive/HiveParams.java | 4 +-
.../stack/bigtop/v3_3_0/kafka/KafkaSetup.java | 2 +-
.../stack/bigtop/v3_3_0/solr/SolrParams.java | 2 +-
.../stack/bigtop/v3_3_0/solr/SolrSetup.java | 2 +-
.../bigtop/v3_3_0/zookeeper/ZookeeperSetup.java | 2 +-
.../manager/stack/core/executor/StackExecutor.java | 2 +-
.../stack/core/spi/script/AbstractScript.java | 4 +-
.../manager/stack/core/utils/LocalSettings.java | 35 +++++--
.../stack/core/utils/LocalSettingsTest.java | 10 +-
.../stack/extra/v1_0_0/doris/DorisParams.java | 2 +-
.../extra/v1_0_0/seatunnel/SeaTunnelSetup.java | 2 +-
.../manager/stack/infra/param/InfraParams.java | 99 +++++++++++++++++-
.../stack/infra/v1_0_0/grafana/GrafanaParams.java | 2 +-
.../infra/v1_0_0/prometheus/PrometheusParams.java | 46 ++++++---
.../v1_0_0/prometheus/PrometheusServerScript.java | 2 +-
.../infra/v1_0_0/prometheus/PrometheusSetup.java | 11 +-
31 files changed, 376 insertions(+), 194 deletions(-)
diff --git
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/grpc/service/JobCacheServiceGrpcImpl.java
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/grpc/service/JobCacheServiceGrpcImpl.java
index 02864811..6ba57b0a 100644
---
a/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/grpc/service/JobCacheServiceGrpcImpl.java
+++
b/bigtop-manager-agent/src/main/java/org/apache/bigtop/manager/agent/grpc/service/JobCacheServiceGrpcImpl.java
@@ -30,11 +30,13 @@ import io.grpc.stub.StreamObserver;
import lombok.extern.slf4j.Slf4j;
import net.devh.boot.grpc.server.service.GrpcService;
+import java.io.File;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import static
org.apache.bigtop.manager.common.constants.CacheFiles.CLUSTER_INFO;
+import static
org.apache.bigtop.manager.common.constants.CacheFiles.COMPONENTS_INFO;
import static
org.apache.bigtop.manager.common.constants.CacheFiles.CONFIGURATIONS_INFO;
import static org.apache.bigtop.manager.common.constants.CacheFiles.HOSTS_INFO;
import static org.apache.bigtop.manager.common.constants.CacheFiles.REPOS_INFO;
@@ -48,17 +50,21 @@ public class JobCacheServiceGrpcImpl extends
JobCacheServiceGrpc.JobCacheService
public void save(JobCacheRequest request, StreamObserver<JobCacheReply>
responseObserver) {
try {
JobCachePayload payload =
JsonUtils.readFromString(request.getPayload(), JobCachePayload.class);
- String cacheDir = ProjectPathUtils.getAgentCachePath();
+ String cacheDir = ProjectPathUtils.getAgentCachePath() +
File.separator + payload.getClusterId();
Path p = Paths.get(cacheDir);
if (!Files.exists(p)) {
Files.createDirectories(p);
}
+ String dir = p.getParent().toFile().getAbsolutePath();
+ JsonUtils.writeToFile(dir + "/current",
payload.getCurrentClusterId());
+
JsonUtils.writeToFile(cacheDir + CONFIGURATIONS_INFO,
payload.getConfigurations());
- JsonUtils.writeToFile(cacheDir + HOSTS_INFO,
payload.getComponentHosts());
+ JsonUtils.writeToFile(cacheDir + COMPONENTS_INFO,
payload.getComponentHosts());
JsonUtils.writeToFile(cacheDir + USERS_INFO,
payload.getUserInfo());
JsonUtils.writeToFile(cacheDir + REPOS_INFO,
payload.getRepoInfo());
JsonUtils.writeToFile(cacheDir + CLUSTER_INFO,
payload.getClusterInfo());
+ JsonUtils.writeToFile(cacheDir + HOSTS_INFO, payload.getHosts());
JobCacheReply reply = JobCacheReply.newBuilder()
.setCode(MessageConstants.SUCCESS_CODE)
diff --git
a/bigtop-manager-common/src/main/java/org/apache/bigtop/manager/common/utils/FileUtils.java
b/bigtop-manager-common/src/main/java/org/apache/bigtop/manager/common/utils/FileUtils.java
index 11fb315b..717b1701 100644
---
a/bigtop-manager-common/src/main/java/org/apache/bigtop/manager/common/utils/FileUtils.java
+++
b/bigtop-manager-common/src/main/java/org/apache/bigtop/manager/common/utils/FileUtils.java
@@ -33,6 +33,11 @@ import java.nio.charset.StandardCharsets;
@NoArgsConstructor(access = AccessLevel.PRIVATE)
public class FileUtils {
+ public static String readFile2Str(String filename) {
+ File file = new File(filename);
+ return readFile2Str(file);
+ }
+
/**
* Get Content
*
diff --git
a/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/ComponentCommandPayload.java
b/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/ComponentCommandPayload.java
index f40e0a70..80457c4f 100644
---
a/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/ComponentCommandPayload.java
+++
b/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/ComponentCommandPayload.java
@@ -24,7 +24,6 @@ import org.apache.bigtop.manager.grpc.pojo.TemplateInfo;
import lombok.Data;
import java.util.List;
-import java.util.Map;
@Data
public class ComponentCommandPayload {
@@ -46,10 +45,4 @@ public class ComponentCommandPayload {
private List<PackageSpecificInfo> packageSpecifics;
private List<TemplateInfo> templates;
-
- /**
- * This field is exclusively used for Prometheus and Grafana within the
infra services.
- * Includes cluster and corresponding hostname.
- */
- private Map<String, List<String>> clusterHosts;
}
diff --git
a/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/JobCachePayload.java
b/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/JobCachePayload.java
index 39fef48f..6bc152ae 100644
---
a/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/JobCachePayload.java
+++
b/bigtop-manager-grpc/src/main/java/org/apache/bigtop/manager/grpc/payload/JobCachePayload.java
@@ -29,6 +29,8 @@ import java.util.Map;
@Data
public class JobCachePayload {
+ private Long currentClusterId;
+
private Long clusterId;
private ClusterInfo clusterInfo;
@@ -40,4 +42,6 @@ public class JobCachePayload {
private Map<String, Map<String, String>> configurations;
private Map<String, List<String>> componentHosts;
+
+ private List<String> hosts;
}
diff --git
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/helper/JobCacheHelper.java
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/helper/JobCacheHelper.java
index 5ee1368e..d1c547da 100644
---
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/helper/JobCacheHelper.java
+++
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/helper/JobCacheHelper.java
@@ -54,8 +54,6 @@ import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
-import static
org.apache.bigtop.manager.common.constants.Constants.ALL_HOST_KEY;
-
public class JobCacheHelper {
private static ClusterDao clusterDao;
@@ -90,19 +88,31 @@ public class JobCacheHelper {
List<CompletableFuture<Boolean>> futures = new ArrayList<>();
for (HostPO hostPO : hostPOList) {
- genClusterPayload(payload, hostPO.getClusterId());
- JobCacheRequest request = JobCacheRequest.newBuilder()
- .setJobId(jobId)
- .setPayload(JsonUtils.writeAsString(payload))
- .build();
- futures.add(CompletableFuture.supplyAsync(() -> {
- JobCacheServiceGrpc.JobCacheServiceBlockingStub stub =
GrpcClient.getBlockingStub(
- hostPO.getHostname(),
- hostPO.getGrpcPort(),
- JobCacheServiceGrpc.JobCacheServiceBlockingStub.class);
- JobCacheReply reply = stub.save(request);
- return reply != null && reply.getCode() ==
MessageConstants.SUCCESS_CODE;
- }));
+ payload.setCurrentClusterId(hostPO.getClusterId());
+
+ List<Long> clusterIds = new ArrayList<>();
+ if (hostRequiresAllData(hostPO.getHostname())) {
+ clusterIds.addAll(
+
clusterDao.findAll().stream().map(ClusterPO::getId).toList());
+ } else {
+ clusterIds.add(hostPO.getClusterId());
+ }
+
+ for (Long clusterId : clusterIds) {
+ genClusterPayload(payload, clusterId);
+ JobCacheRequest request = JobCacheRequest.newBuilder()
+ .setJobId(jobId)
+ .setPayload(JsonUtils.writeAsString(payload))
+ .build();
+ futures.add(CompletableFuture.supplyAsync(() -> {
+ JobCacheServiceGrpc.JobCacheServiceBlockingStub stub =
GrpcClient.getBlockingStub(
+ hostPO.getHostname(),
+ hostPO.getGrpcPort(),
+
JobCacheServiceGrpc.JobCacheServiceBlockingStub.class);
+ JobCacheReply reply = stub.save(request);
+ return reply != null && reply.getCode() ==
MessageConstants.SUCCESS_CODE;
+ }));
+ }
}
List<Boolean> results = futures.stream()
@@ -139,22 +149,22 @@ public class JobCacheHelper {
Map<String, List<String>> componentHostMap =
payload.getComponentHosts();
componentHostMap.putAll(getComponentHostMap(clusterId));
+ List<String> hosts = hostDao.findAllByClusterId(clusterId).stream()
+ .map(HostPO::getHostname)
+ .toList();
+
payload.setClusterId(clusterId);
payload.setClusterInfo(clusterInfo);
payload.setConfigurations(serviceConfigMap);
payload.setComponentHosts(componentHostMap);
+ payload.setHosts(hosts);
}
private static void genGlobalPayload(JobCachePayload payload) {
List<RepoPO> repoPOList = repoDao.findAll();
- List<HostPO> hostPOList = hostDao.findAll();
-
Map<String, Map<String, String>> serviceConfigMap =
getServiceConfigMap(0L);
- Map<String, List<String>> componentHostMap = new HashMap<>();
- List<String> allHostnames =
hostPOList.stream().map(HostPO::getHostname).toList();
- componentHostMap.put(ALL_HOST_KEY, allHostnames);
- componentHostMap.putAll(getComponentHostMap(0L));
+ Map<String, List<String>> componentHostMap = new
HashMap<>(getComponentHostMap(0L));
List<RepoInfo> repoList = new ArrayList<>();
repoPOList.forEach(repoPO -> {
@@ -222,6 +232,16 @@ public class JobCacheHelper {
private static Boolean hostRequiresAllData(String hostname) {
// Some services like prometheus requires all clusters info to collect
metrics.
+ List<ComponentPO> components = componentDao.findByQuery(
+ ComponentQuery.builder().hostname(hostname).build());
+ for (ComponentPO component : components) {
+ ServiceDTO serviceDTO =
StackUtils.getServiceDTOByComponentName(component.getName());
+ StackDTO stack = StackUtils.getServiceStack(serviceDTO.getName());
+ if (stack.getStackName().equals("infra")) {
+ return true;
+ }
+ }
+
return false;
}
}
diff --git
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/stage/AbstractComponentStage.java
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/stage/AbstractComponentStage.java
index ce610b8b..5908cbad 100644
---
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/stage/AbstractComponentStage.java
+++
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/stage/AbstractComponentStage.java
@@ -19,7 +19,6 @@
package org.apache.bigtop.manager.server.command.stage;
import org.apache.bigtop.manager.dao.po.ClusterPO;
-import org.apache.bigtop.manager.dao.po.HostPO;
import org.apache.bigtop.manager.dao.repository.ClusterDao;
import org.apache.bigtop.manager.server.command.task.TaskContext;
import org.apache.bigtop.manager.server.holder.SpringContextHolder;
@@ -27,11 +26,6 @@ import
org.apache.bigtop.manager.server.model.dto.ComponentDTO;
import org.apache.bigtop.manager.server.model.dto.ServiceDTO;
import org.apache.bigtop.manager.server.utils.StackUtils;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
public abstract class AbstractComponentStage extends AbstractStage {
private ClusterDao clusterDao;
@@ -78,23 +72,6 @@ public abstract class AbstractComponentStage extends
AbstractStage {
taskContext.setServiceUser(serviceDTO.getUser());
taskContext.setUserGroup(clusterPO == null ? null :
clusterPO.getUserGroup());
taskContext.setRootDir(clusterPO == null ? null :
clusterPO.getRootDir());
-
- Map<String, Object> properties = new HashMap<>();
- properties.put("clusterHosts", getClusterHosts());
- taskContext.setProperties(properties);
return taskContext;
}
-
- protected Map<String, List<String>> getClusterHosts() {
- Map<String, List<String>> clusterHosts = new HashMap<>();
- for (ClusterPO clusterPO : clusterDao.findAll()) {
- List<String> hosts = new ArrayList<>();
- for (HostPO hostPO :
hostDao.findAllByClusterId(clusterPO.getId())) {
- String host = hostPO.getHostname();
- hosts.add(host);
- }
- clusterHosts.put(clusterPO.getName(), hosts);
- }
- return clusterHosts;
- }
}
diff --git
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/task/AbstractComponentTask.java
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/task/AbstractComponentTask.java
index 9d141fc2..d3e621ef 100644
---
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/task/AbstractComponentTask.java
+++
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/command/task/AbstractComponentTask.java
@@ -39,7 +39,6 @@ import org.apache.bigtop.manager.server.utils.StackUtils;
import java.util.ArrayList;
import java.util.List;
-import java.util.Map;
public abstract class AbstractComponentTask extends AbstractTask {
@@ -83,12 +82,6 @@ public abstract class AbstractComponentTask extends
AbstractTask {
payload.setTemplates(convertTemplateInfo(serviceDTO.getName(),
serviceDTO.getTemplates()));
payload.setPackageSpecifics(convertPackageSpecificInfo(serviceDTO.getPackageSpecifics()));
- Map<String, Object> properties = taskContext.getProperties();
- if (stackDTO.getStackName().equals("infra")) {
- Map<String, List<String>> clusterHosts = (Map<String,
List<String>>) properties.get("clusterHosts");
- payload.setClusterHosts(clusterHosts);
- }
-
ComponentCommandRequest.Builder requestBuilder =
ComponentCommandRequest.newBuilder();
requestBuilder.setPayload(JsonUtils.writeAsString(payload));
requestBuilder.setTaskId(getTaskPO().getId());
diff --git
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/utils/StackUtils.java
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/utils/StackUtils.java
index 42002e08..c00f57ee 100644
---
a/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/utils/StackUtils.java
+++
b/bigtop-manager-server/src/main/java/org/apache/bigtop/manager/server/utils/StackUtils.java
@@ -164,12 +164,20 @@ public class StackUtils {
private static void parseServiceTemplates(File file, String serviceName) {
File templateFolder = new File(file.getAbsolutePath(),
TEMPLATE_FOLDER);
if (templateFolder.exists()) {
- for (File templateFile :
- Optional.ofNullable(templateFolder.listFiles()).orElse(new
File[0])) {
- String filename = templateFile.getName();
- String content = FileUtils.readFile2Str(templateFile);
- Map<String, String> map =
SERVICE_TEMPLATE_MAP.computeIfAbsent(serviceName, k -> new HashMap<>());
- map.put(filename, content);
+ Map<String, String> map =
SERVICE_TEMPLATE_MAP.computeIfAbsent(serviceName, k -> new HashMap<>());
+ parseTemplateFiles(templateFolder, templateFolder, map);
+ }
+ }
+
+ private static void parseTemplateFiles(File templateRoot, File
currentFolder, Map<String, String> templateMap) {
+ for (File file :
Optional.ofNullable(currentFolder.listFiles()).orElse(new File[0])) {
+ if (file.isDirectory()) {
+ parseTemplateFiles(templateRoot, file, templateMap);
+ } else {
+ String relativePath =
+
templateRoot.toURI().relativize(file.toURI()).getPath();
+ String content = FileUtils.readFile2Str(file);
+ templateMap.put(relativePath, content);
}
}
}
diff --git
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/metainfo.xml
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/metainfo.xml
index 45465e02..c7f7e25e 100644
---
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/metainfo.xml
+++
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/grafana/metainfo.xml
@@ -75,11 +75,11 @@
<templates>
<template>
<src>cluster-dashboard.json</src>
- <dest>conf/provisioning/dashboards/cluster</dest>
+
<dest>conf/provisioning/dashboards/cluster/cluster-dashboard.json</dest>
</template>
<template>
<src>host-dashboard.json</src>
- <dest>conf/provisioning/dashboards/host</dest>
+
<dest>conf/provisioning/dashboards/host/host-dashboard.json</dest>
</template>
</templates>
diff --git
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
deleted file mode 100644
index e7ee51e1..00000000
---
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus-rule.xml
+++ /dev/null
@@ -1,65 +0,0 @@
-<?xml version="1.0"?>
-<!--
- ~ Licensed to the Apache Software Foundation (ASF) under one
- ~ or more contributor license agreements. See the NOTICE file
- ~ distributed with this work for additional information
- ~ regarding copyright ownership. The ASF licenses this file
- ~ to you under the Apache License, Version 2.0 (the
- ~ "License"); you may not use this file except in compliance
- ~ with the License. You may obtain a copy of the License at
- ~
- ~ https://www.apache.org/licenses/LICENSE-2.0
- ~
- ~ Unless required by applicable law or agreed to in writing,
- ~ software distributed under the License is distributed on an
- ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- ~ KIND, either express or implied. See the License for the
- ~ specific language governing permissions and limitations
- ~ under the License.
--->
-
-<configuration>
- <property>
- <name>rules_file_name</name>
- <value>prometheus_rules.yml</value>
- <description>Rules file name</description>
- </property>
- <property>
- <name>content</name>
- <description>This is the freemarker template for rules
file</description>
- <value><![CDATA[
-groups:
- # Recording rules group: Used to calculate and save new aggregated metrics
- - name: example_recording_rules
- interval: 1m # The frequency at which the rules are evaluated
-
- rules:
- # Recording rule: Calculate the average CPU usage over the last 5
minutes for each job
- - record: job:cpu_usage:avg
- expr: avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (job)
- # This creates a new metric `job:cpu_usage:avg` representing the
average CPU usage per job
-
- # Alerting rules group: Used to trigger alerts based on conditions
- - name: example_alerting_rules
- interval: 1m # The frequency at which the alerting rules are evaluated
-
- rules:
- # Alerting rule: Trigger an alert if the average CPU usage is over 90%
for the last 5 minutes
- - alert: HighCpuUsage
- expr: avg(rate(node_cpu_seconds_total{mode="user"}[5m])) by (instance)
> 0.9
- # This expression checks if the average CPU usage over the last 5
minutes for each instance is greater than 90%
- for: 5m # The condition must hold true for 5 minutes before the alert
is triggered
- labels:
- severity: critical # Set the severity of the alert as 'critical'
- annotations:
- summary: "CPU usage on instance {{ $labels.instance }} is over 90%
for the last 5 minutes"
- # Summary of the alert that will appear when it triggers
- description: "The CPU usage on instance {{ $labels.instance }} has
been over 90% for the past 5 minutes."
- # Detailed description of the alert that will provide more context
-]]>
- </value>
- <attrs>
- <type>longtext</type>
- </attrs>
- </property>
-</configuration>
\ No newline at end of file
diff --git
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
index 215bec8b..201982bf 100644
---
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
+++
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/configuration/prometheus.xml
@@ -39,9 +39,7 @@ global:
# Rule files specifies a list of globs. Rules and alerts are read from
# all matching files.
rule_files:
-<#if rules_file_name??>
- - ${rules_file_name}
-</#if>
+ - rules/zookeeper.yml
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
diff --git
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/metainfo.xml
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/metainfo.xml
index dfc1850b..77d3297f 100644
---
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/metainfo.xml
+++
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/metainfo.xml
@@ -71,5 +71,12 @@
</packages>
</package-specific>
</package-specifics>
+
+ <templates>
+ <template>
+ <src>rules/zookeeper.yml</src>
+ <dest>rules/zookeeper.yml</dest>
+ </template>
+ </templates>
</service>
-</metainfo>
\ No newline at end of file
+</metainfo>
diff --git
a/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/template/rules/zookeeper.yml
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/template/rules/zookeeper.yml
new file mode 100644
index 00000000..2cba399d
--- /dev/null
+++
b/bigtop-manager-server/src/main/resources/stacks/infra/1.0.0/services/prometheus/template/rules/zookeeper.yml
@@ -0,0 +1,111 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+groups:
+ - name: zookeeper-alerts
+ rules:
+ - alert: ZooKeeper server is down
+ expr: up{job=~".*-zookeeper"} == 0
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Instance {{ $labels.instance }} ZooKeeper server is down"
+ description: "{{ $labels.instance }} of job {{$labels.job}}
ZooKeeper server is down: [{{ $value }}]."
+
+ - alert: create too many znodes
+ expr: znode_count{job=~".*-zookeeper"} > 1000000
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance {{ $labels.instance }} create too many znodes"
+ description: "{{ $labels.instance }} of job {{$labels.job}} create
too many znodes: [{{ $value }}]."
+
+ - alert: create too many connections
+ expr: num_alive_connections{job=~".*-zookeeper"} > 50 # suppose we use
the default maxClientCnxns: 60
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance {{ $labels.instance }} create too many
connections"
+ description: "{{ $labels.instance }} of job {{$labels.job}} create
too many connections: [{{ $value }}]."
+
+ - alert: znode total occupied memory is too big
+ expr: approximate_data_size{job=~".*-zookeeper"} /1024 /1024 > 1 *
1024 # more than 1024 MB(1 GB)
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance {{ $labels.instance }} znode total occupied
memory is too big"
+ description: "{{ $labels.instance }} of job {{$labels.job}} znode
total occupied memory is too big: [{{ $value }}] MB."
+
+ - alert: set too many watch
+ expr: watch_count{job=~".*-zookeeper"} > 10000
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance {{ $labels.instance }} set too many watch"
+ description: "{{ $labels.instance }} of job {{$labels.job}} set too
many watch: [{{ $value }}]."
+
+ - alert: a leader election happens
+ expr: increase(election_time_count{job=~".*-zookeeper"}[5m]) > 0
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance {{ $labels.instance }} a leader election happens"
+ description: "{{ $labels.instance }} of job {{$labels.job}} a leader
election happens: [{{ $value }}]."
+
+ - alert: open too many files
+ expr: open_file_descriptor_count{job=~".*-zookeeper"} > 300
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance {{ $labels.instance }} open too many files"
+ description: "{{ $labels.instance }} of job {{$labels.job}} open too
many files: [{{ $value }}]."
+
+ - alert: fsync time is too long
+ expr: rate(fsynctime_sum{job=~".*-zookeeper"}[1m]) > 100
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance {{ $labels.instance }} fsync time is too long"
+ description: "{{ $labels.instance }} of job {{$labels.job}} fsync
time is too long: [{{ $value }}]."
+
+ - alert: take snapshot time is too long
+ expr: rate(snapshottime_sum{job=~".*-zookeeper"}[5m]) > 100
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance {{ $labels.instance }} take snapshot time is too
long"
+ description: "{{ $labels.instance }} of job {{$labels.job}} take
snapshot time is too long: [{{ $value }}]."
+
+ - alert: avg latency is too high
+ expr: avg_latency{job=~".*-zookeeper"} > 100
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance {{ $labels.instance }} avg latency is too high"
+ description: "{{ $labels.instance }} of job {{$labels.job}} avg
latency is too high: [{{ $value }}]."
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hadoop/HadoopParams.java
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hadoop/HadoopParams.java
index b3110a7a..356c6e03 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hadoop/HadoopParams.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hadoop/HadoopParams.java
@@ -70,7 +70,7 @@ public class HadoopParams extends BigtopParams {
super(componentCommandPayload);
globalParamsMap.put("hdfs_user", user());
globalParamsMap.put("hdfs_group", group());
- globalParamsMap.put("datanode_hosts", LocalSettings.hosts("datanode"));
+ globalParamsMap.put("datanode_hosts",
LocalSettings.componentHosts("datanode"));
globalParamsMap.put("java_home", javaHome());
globalParamsMap.put("hadoop_home", serviceHome());
globalParamsMap.put("hadoop_conf_dir", confDir());
@@ -96,7 +96,7 @@ public class HadoopParams extends BigtopParams {
@GlobalParams
public Map<String, Object> coreSite() {
Map<String, Object> coreSite =
LocalSettings.configurations(getServiceName(), "core-site");
- List<String> namenodeList = LocalSettings.hosts("namenode");
+ List<String> namenodeList = LocalSettings.componentHosts("namenode");
if (!namenodeList.isEmpty()) {
coreSite.put(
"fs.defaultFS", ((String)
coreSite.get("fs.defaultFS")).replace("localhost", namenodeList.get(0)));
@@ -112,7 +112,7 @@ public class HadoopParams extends BigtopParams {
@GlobalParams
public Map<String, Object> hdfsSite() {
Map<String, Object> hdfsSite =
LocalSettings.configurations(getServiceName(), "hdfs-site");
- List<String> namenodeList = LocalSettings.hosts("namenode");
+ List<String> namenodeList = LocalSettings.componentHosts("namenode");
if (!namenodeList.isEmpty()) {
hdfsSite.put(
"dfs.namenode.rpc-address",
@@ -147,7 +147,7 @@ public class HadoopParams extends BigtopParams {
@GlobalParams
public Map<String, Object> yarnSite() {
Map<String, Object> yarnSite =
LocalSettings.configurations(getServiceName(), "yarn-site");
- List<String> resourcemanagerList =
LocalSettings.hosts("resourcemanager");
+ List<String> resourcemanagerList =
LocalSettings.componentHosts("resourcemanager");
if (!resourcemanagerList.isEmpty()) {
yarnSite.put("yarn.resourcemanager.hostname",
MessageFormat.format("{0}", resourcemanagerList.get(0)));
}
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hbase/HBaseParams.java
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hbase/HBaseParams.java
index ab8da72f..cfb29f32 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hbase/HBaseParams.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hbase/HBaseParams.java
@@ -56,7 +56,7 @@ public class HBaseParams extends BigtopParams {
globalParamsMap.put("security_enabled", false);
globalParamsMap.put("hbase_user", user());
globalParamsMap.put("hbase_group", group());
- globalParamsMap.put("regionserver_hosts",
LocalSettings.hosts("hbase_regionserver"));
+ globalParamsMap.put("regionserver_hosts",
LocalSettings.componentHosts("hbase_regionserver"));
hbaseMasterPidFile = hbasePidDir + "/hbase-" + user() + "-master.pid";
hbaseRegionServerPidFile = hbasePidDir + "/hbase-" + user() +
"-regionserver.pid";
@@ -75,7 +75,7 @@ public class HBaseParams extends BigtopParams {
@GlobalParams
public Map<String, Object> hbaseSite() {
Map<String, Object> configurations =
LocalSettings.configurations(getServiceName(), "hbase-site");
- List<String> zookeeperQuorum = LocalSettings.hosts("zookeeper_server");
+ List<String> zookeeperQuorum =
LocalSettings.componentHosts("zookeeper_server");
Map<String, Object> zooCfg = LocalSettings.configurations("zookeeper",
"zoo.cfg");
// Auto generate properties for hbase-site.xml
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java
index 176f9b4f..c20cbd25 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/hive/HiveParams.java
@@ -82,13 +82,13 @@ public class HiveParams extends BigtopParams {
// Auto generate zookeeper properties for hive-site.xml
Map<String, Object> zooCfg = LocalSettings.configurations("zookeeper",
"zoo.cfg");
- List<String> zookeeperQuorum = LocalSettings.hosts("zookeeper_server");
+ List<String> zookeeperQuorum =
LocalSettings.componentHosts("zookeeper_server");
configurations.put("hive.zookeeper.client.port",
zooCfg.get("clientPort"));
configurations.put("hive.zookeeper.quorum", String.join(",",
zookeeperQuorum));
// Auto generate database properties for hive-site.xml
- String mysqlHost = LocalSettings.hosts("mysql_server").get(0);
+ String mysqlHost = LocalSettings.componentHosts("mysql_server").get(0);
String mysqlPassword = LocalSettings.configurations("mysql", "common")
.get("root_password")
.toString();
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/kafka/KafkaSetup.java
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/kafka/KafkaSetup.java
index f94049b1..e7e13b39 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/kafka/KafkaSetup.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/kafka/KafkaSetup.java
@@ -53,7 +53,7 @@ public class KafkaSetup {
LinuxFileUtils.createDirectories(kafkaParams.getKafkaLogDir(),
kafkaUser, kafkaGroup, PERMISSION_755, true);
LinuxFileUtils.createDirectories(kafkaParams.getKafkaPidDir(),
kafkaUser, kafkaGroup, PERMISSION_755, true);
- List<String> zookeeperServerHosts =
LocalSettings.hosts("zookeeper_server");
+ List<String> zookeeperServerHosts =
LocalSettings.componentHosts("zookeeper_server");
Map<String, Object> paramMap = new HashMap<>();
paramMap.put("zk_server_list", zookeeperServerHosts);
paramMap.put("host", kafkaParams.hostname());
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrParams.java
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrParams.java
index f08d6896..e4408d24 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrParams.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrParams.java
@@ -71,7 +71,7 @@ public class SolrParams extends BigtopParams {
solrPort = (String) solrEnv.get("solr_port");
solrPidFile = solrPidDir + "/solr-" + solrPort + ".pid";
- List<String> ZookeeperServerHosts =
LocalSettings.hosts("zookeeper_server");
+ List<String> ZookeeperServerHosts =
LocalSettings.componentHosts("zookeeper_server");
Map<String, Object> ZKPort = LocalSettings.configurations("zookeeper",
"zoo.cfg");
String clientPort = (String) ZKPort.get("clientPort");
zNode = (String) solrEnv.get("solr_znode");
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrSetup.java
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrSetup.java
index e4543a25..f38c1892 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrSetup.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/solr/SolrSetup.java
@@ -51,7 +51,7 @@ public class SolrSetup {
LinuxFileUtils.createDirectories(solrParams.getSolrLogDir(), solrUser,
solrGroup, PERMISSION_755, true);
LinuxFileUtils.createDirectories(solrParams.getSolrPidDir(), solrUser,
solrGroup, PERMISSION_755, true);
- List<String> zookeeperServerHosts =
LocalSettings.hosts("zookeeper_server");
+ List<String> zookeeperServerHosts =
LocalSettings.componentHosts("zookeeper_server");
Map<String, Object> paramMap = new HashMap<>();
paramMap.put("zookeeper_quorum", zookeeperServerHosts);
paramMap.put("host", solrParams.hostname());
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/zookeeper/ZookeeperSetup.java
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/zookeeper/ZookeeperSetup.java
index 1721b0f1..2717ea10 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/zookeeper/ZookeeperSetup.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-bigtop/src/main/java/org/apache/bigtop/manager/stack/bigtop/v3_3_0/zookeeper/ZookeeperSetup.java
@@ -48,7 +48,7 @@ public class ZookeeperSetup {
String zookeeperGroup = zookeeperParams.group();
Map<String, Object> zookeeperEnv = zookeeperParams.zookeeperEnv();
Map<String, Object> zooCfg = zookeeperParams.zooCfg();
- List<String> zkHostList = LocalSettings.hosts("zookeeper_server");
+ List<String> zkHostList =
LocalSettings.componentHosts("zookeeper_server");
LinuxFileUtils.createDirectories(
zookeeperParams.getZookeeperDataDir(), zookeeperUser,
zookeeperGroup, Constants.PERMISSION_755, true);
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/executor/StackExecutor.java
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/executor/StackExecutor.java
index 16067ffd..dd0278bc 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/executor/StackExecutor.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/executor/StackExecutor.java
@@ -44,7 +44,7 @@ public class StackExecutor {
private static final Map<String, Hook> HOOK_MAP = new
PrioritySPIFactory<>(Hook.class).getSPIMap();
- private static Script
getCommandScript(org.apache.bigtop.manager.grpc.payload.ComponentCommandPayload
payload) {
+ private static Script getCommandScript(ComponentCommandPayload payload) {
String componentName = payload.getComponentName();
Script script = SCRIPT_MAP.get(componentName);
if (script == null) {
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/spi/script/AbstractScript.java
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/spi/script/AbstractScript.java
index 913d66cd..cc53a12c 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/spi/script/AbstractScript.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/spi/script/AbstractScript.java
@@ -77,8 +77,8 @@ public abstract class AbstractScript implements Script {
public ShellResult configure(Params params) {
List<TemplateInfo> templates = params.templates();
for (TemplateInfo template : templates) {
- String dir = params.serviceHome() + "/" + template.getDest();
- String filename = dir + "/" + template.getSrc();
+ String filename = params.serviceHome() + "/" + template.getDest();
+ String dir = Path.of(filename).getParent().toString();
LinuxFileUtils.createDirectories(dir, params.user(),
params.group(), PERMISSION_755, true);
LinuxFileUtils.toFile(
ConfigType.CONTENT,
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/LocalSettings.java
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/LocalSettings.java
index f8690599..4cfcb877 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/LocalSettings.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-core/src/main/java/org/apache/bigtop/manager/stack/core/utils/LocalSettings.java
@@ -19,6 +19,7 @@
package org.apache.bigtop.manager.stack.core.utils;
import org.apache.bigtop.manager.common.constants.CacheFiles;
+import org.apache.bigtop.manager.common.utils.FileUtils;
import org.apache.bigtop.manager.common.utils.JsonUtils;
import org.apache.bigtop.manager.common.utils.ProjectPathUtils;
import org.apache.bigtop.manager.common.utils.os.OSDetection;
@@ -30,6 +31,7 @@ import com.fasterxml.jackson.core.type.TypeReference;
import lombok.extern.slf4j.Slf4j;
import java.io.File;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -44,7 +46,7 @@ public class LocalSettings {
public static Map<String, Object> configurations(String service, String
type) {
Map<String, Object> configDataMap = new HashMap<>();
- File file = createFile(ProjectPathUtils.getAgentCachePath() +
CacheFiles.CONFIGURATIONS_INFO);
+ File file = createFile(clusterCacheDir() +
CacheFiles.CONFIGURATIONS_INFO);
try {
if (file.exists()) {
Map<String, Map<String, Object>> configJson =
JsonUtils.readFromFile(file, new TypeReference<>() {});
@@ -61,22 +63,31 @@ public class LocalSettings {
return configDataMap;
}
- public static List<String> hosts(String componentName) {
- return hosts().getOrDefault(componentName, List.of());
+ public static List<String> componentHosts(String componentName) {
+ return componentHosts().getOrDefault(componentName, List.of());
}
- public static Map<String, List<String>> hosts() {
+ public static Map<String, List<String>> componentHosts() {
Map<String, List<String>> hostJson = new HashMap<>();
- File file = createFile(ProjectPathUtils.getAgentCachePath() +
CacheFiles.HOSTS_INFO);
+ File file = createFile(clusterCacheDir() + CacheFiles.COMPONENTS_INFO);
if (file.exists()) {
hostJson = JsonUtils.readFromFile(file, new TypeReference<>() {});
}
return hostJson;
}
+ public static List<String> clusterHosts() {
+ List<String> hosts = new ArrayList<>();
+ File file = createFile(clusterCacheDir() + CacheFiles.COMPONENTS_INFO);
+ if (file.exists()) {
+ hosts = JsonUtils.readFromFile(file, new TypeReference<>() {});
+ }
+ return hosts;
+ }
+
public static Map<String, Object> basicInfo() {
Map<String, Object> settings = new HashMap<>();
- File file = createFile(ProjectPathUtils.getAgentCachePath() +
CacheFiles.SETTINGS_INFO);
+ File file = createFile(clusterCacheDir() + CacheFiles.SETTINGS_INFO);
if (file.exists()) {
settings = JsonUtils.readFromFile(file, new TypeReference<>() {});
}
@@ -85,7 +96,7 @@ public class LocalSettings {
public static Map<String, String> users() {
Map<String, String> userMap = new HashMap<>();
- File file = createFile(ProjectPathUtils.getAgentCachePath() +
CacheFiles.USERS_INFO);
+ File file = createFile(clusterCacheDir() + CacheFiles.USERS_INFO);
if (file.exists()) {
userMap = JsonUtils.readFromFile(file, new TypeReference<>() {});
}
@@ -110,7 +121,7 @@ public class LocalSettings {
public static List<RepoInfo> repos() {
List<RepoInfo> repoInfoList = List.of();
- File file = createFile(ProjectPathUtils.getAgentCachePath() +
CacheFiles.REPOS_INFO);
+ File file = createFile(clusterCacheDir() + CacheFiles.REPOS_INFO);
if (file.exists()) {
repoInfoList = JsonUtils.readFromFile(file, new TypeReference<>()
{});
}
@@ -119,13 +130,19 @@ public class LocalSettings {
public static ClusterInfo cluster() {
ClusterInfo clusterInfo = new ClusterInfo();
- File file = createFile(ProjectPathUtils.getAgentCachePath() +
CacheFiles.CLUSTER_INFO);
+ File file = createFile(clusterCacheDir() + CacheFiles.CLUSTER_INFO);
if (file.exists()) {
clusterInfo = JsonUtils.readFromFile(file, new TypeReference<>()
{});
}
return clusterInfo;
}
+ protected static String clusterCacheDir() {
+ String agentCachePath = ProjectPathUtils.getAgentCachePath();
+ String clusterId = FileUtils.readFile2Str(agentCachePath +
File.separator + "current");
+ return agentCachePath + File.separator + clusterId;
+ }
+
protected static File createFile(String fileName) {
return new File(fileName);
}
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-core/src/test/java/org/apache/bigtop/manager/stack/core/utils/LocalSettingsTest.java
b/bigtop-manager-stack/bigtop-manager-stack-core/src/test/java/org/apache/bigtop/manager/stack/core/utils/LocalSettingsTest.java
index 73d0ca87..92e4d8dc 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-core/src/test/java/org/apache/bigtop/manager/stack/core/utils/LocalSettingsTest.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-core/src/test/java/org/apache/bigtop/manager/stack/core/utils/LocalSettingsTest.java
@@ -108,7 +108,7 @@ public class LocalSettingsTest {
}
@Test
- public void testHosts() {
+ public void testComponentHosts() {
String componentName = "componentA";
Map<String, List<String>> hostJson = new HashMap<>();
@@ -123,11 +123,13 @@ public class LocalSettingsTest {
localSettingsMockedStatic
.when(() -> JsonUtils.readFromFile(any(File.class),
any(TypeReference.class)))
.thenReturn(hostJson);
- localSettingsMockedStatic.when(() ->
LocalSettings.hosts(anyString())).thenCallRealMethod();
-
localSettingsMockedStatic.when(LocalSettings::hosts).thenCallRealMethod();
+ localSettingsMockedStatic
+ .when(() -> LocalSettings.componentHosts(anyString()))
+ .thenCallRealMethod();
+
localSettingsMockedStatic.when(LocalSettings::componentHosts).thenCallRealMethod();
List<String> expectedHosts = List.of("host1", "host2");
- assertEquals(expectedHosts, LocalSettings.hosts(componentName));
+ assertEquals(expectedHosts,
LocalSettings.componentHosts(componentName));
}
@Test
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/doris/DorisParams.java
b/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/doris/DorisParams.java
index 8f1f200f..05794f31 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/doris/DorisParams.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/doris/DorisParams.java
@@ -90,7 +90,7 @@ public class DorisParams extends ExtraParams {
}
public List<String> dorisFeHosts() {
- return LocalSettings.hosts("doris_fe");
+ return LocalSettings.componentHosts("doris_fe");
}
public int dorisFeHttpPort() {
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/seatunnel/SeaTunnelSetup.java
b/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/seatunnel/SeaTunnelSetup.java
index dfa70540..b8f4881b 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/seatunnel/SeaTunnelSetup.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-extra/src/main/java/org/apache/bigtop/manager/stack/extra/v1_0_0/seatunnel/SeaTunnelSetup.java
@@ -141,7 +141,7 @@ public class SeaTunnelSetup {
private static List<String> hostPort(String componentName, String port,
int spacesNum) {
String spaces = " ".repeat(spacesNum);
- List<String> hostList = LocalSettings.hosts(componentName);
+ List<String> hostList = LocalSettings.componentHosts(componentName);
hostList.sort(String::compareToIgnoreCase);
List<String> hostPortList = new ArrayList<>();
for (String host : hostList) {
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/param/InfraParams.java
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/param/InfraParams.java
index 33e3ce2d..dc22be51 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/param/InfraParams.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/param/InfraParams.java
@@ -18,14 +18,24 @@
*/
package org.apache.bigtop.manager.stack.infra.param;
+import org.apache.bigtop.manager.common.constants.CacheFiles;
+import org.apache.bigtop.manager.common.utils.JsonUtils;
+import org.apache.bigtop.manager.common.utils.ProjectPathUtils;
import org.apache.bigtop.manager.grpc.payload.ComponentCommandPayload;
+import org.apache.bigtop.manager.grpc.pojo.ClusterInfo;
import org.apache.bigtop.manager.stack.core.spi.param.BaseParams;
+import org.apache.bigtop.manager.stack.core.utils.LocalSettings;
+import com.fasterxml.jackson.core.type.TypeReference;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
+import java.io.File;
+import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
@Slf4j
@NoArgsConstructor
@@ -45,8 +55,91 @@ public abstract class InfraParams extends BaseParams {
return "infra";
}
- public Map<String, List<String>> getClusterHosts() {
- // In Component Status stage, clusterHosts is null
- return payload.getClusterHosts();
+ protected Map<String, List<String>> getClusterHosts() {
+ Map<String, List<String>> clusterHosts = new HashMap<>();
+ List<String> subDirs = getClusterDirs();
+
+ for (String subDir : subDirs) {
+ List<String> hosts = JsonUtils.readFromFile(subDir +
CacheFiles.HOSTS_INFO);
+ Map<String, String> clusterInfo = JsonUtils.readFromFile(subDir +
CacheFiles.CLUSTER_INFO);
+ clusterHosts.put(clusterInfo.get("name"), hosts);
+ }
+
+ return clusterHosts;
+ }
+
+ protected Map<String, List<String>> getComponentHosts(String
componentName) {
+ Map<String, List<String>> componentHosts = new HashMap<>();
+ List<String> subDirs = getClusterDirs();
+
+ for (String subDir : subDirs) {
+ Map<String, List<String>> components =
JsonUtils.readFromFile(subDir + CacheFiles.COMPONENTS_INFO);
+ Map<String, String> clusterInfo = JsonUtils.readFromFile(subDir +
CacheFiles.CLUSTER_INFO);
+
+ List<String> hosts = components.getOrDefault(componentName,
List.of());
+ componentHosts.put(clusterInfo.get("name"), hosts);
+ }
+
+ return componentHosts;
+ }
+
+ protected Map<String, Map<String, Object>> configurations(String service,
String type) {
+ Map<String, Map<String, Object>> configurations = new HashMap<>();
+ List<String> subDirs = getClusterDirs();
+
+ for (String subDir : subDirs) {
+ Map<String, String> clusterInfo = JsonUtils.readFromFile(subDir +
CacheFiles.CLUSTER_INFO);
+ Map<String, Map<String, Object>> configJson =
+ JsonUtils.readFromFile(subDir +
CacheFiles.CONFIGURATIONS_INFO);
+
+ Object configData =
+ configJson.getOrDefault(service, new
HashMap<>()).get(type);
+ Map<String, Object> map =
JsonUtils.readFromString(configData.toString());
+ configurations.put(clusterInfo.get("name"), map);
+ }
+
+ return configurations;
+ }
+
+ protected Map<String, Object> configurations(String service, String type,
String key) {
+ Map<String, Object> configurations = new HashMap<>();
+ List<String> subDirs = getClusterDirs();
+
+ for (String subDir : subDirs) {
+ Map<String, String> clusterInfo = JsonUtils.readFromFile(subDir +
CacheFiles.CLUSTER_INFO);
+ Map<String, Map<String, Object>> configJson =
+ JsonUtils.readFromFile(subDir +
CacheFiles.CONFIGURATIONS_INFO);
+
+ Object configData =
+ configJson.getOrDefault(service, new
HashMap<>()).get(type);
+ Map<String, Object> map =
JsonUtils.readFromString(configData.toString());
+ configurations.put(clusterInfo.get("name"), map.getOrDefault(key,
null));
+ }
+
+ return configurations;
+ }
+
+ protected List<ClusterInfo> clusters() {
+ return getClusterDirs().stream()
+ .map(dir -> JsonUtils.readFromFile(dir +
CacheFiles.CLUSTER_INFO, new TypeReference<ClusterInfo>() {}))
+ .toList();
+ }
+
+ /**
+ * Get the cluster info of the host where the component is running on.
+ * Since infra service can be installed across clusters, this will get
different cluster info based on the host.
+ *
+ * @return ClusterInfo
+ */
+ protected ClusterInfo hostCluster() {
+ return LocalSettings.cluster();
+ }
+
+ private List<String> getClusterDirs() {
+ File file = new File(ProjectPathUtils.getAgentCachePath());
+ return Arrays.stream(Objects.requireNonNull(file.listFiles()))
+ .filter(File::isDirectory)
+ .map(File::getAbsolutePath)
+ .toList();
}
}
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java
index 22a215ef..f5a92140 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/grafana/GrafanaParams.java
@@ -115,7 +115,7 @@ public class GrafanaParams extends InfraParams {
@GlobalParams
public Map<String, Object> prometheus() {
Map<String, Object> configuration =
LocalSettings.configurations(getServiceName(), "grafana-datasources");
- List<String> prometheusServers =
LocalSettings.hosts().get("prometheus_server");
+ List<String> prometheusServers =
LocalSettings.componentHosts().get("prometheus_server");
if (prometheusServers == null || prometheusServers.isEmpty()) {
return configuration;
}
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
index 63bbd94f..063cc4df 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusParams.java
@@ -19,6 +19,7 @@
package org.apache.bigtop.manager.stack.infra.v1_0_0.prometheus;
import org.apache.bigtop.manager.grpc.payload.ComponentCommandPayload;
+import org.apache.bigtop.manager.grpc.pojo.ClusterInfo;
import org.apache.bigtop.manager.stack.core.annotations.GlobalParams;
import org.apache.bigtop.manager.stack.core.spi.param.Params;
import org.apache.bigtop.manager.stack.core.utils.LocalSettings;
@@ -48,11 +49,10 @@ public class PrometheusParams extends InfraParams {
private Map<String, Object> prometheusScrapeJob;
private Map<String, Object> agentScrapeJob;
+ private List<Map<String, Object>> zookeeperScrapeJobs;
private List<Map<String, Object>> scrapeJobs;
private String prometheusPort;
private String prometheusContent;
- private String prometheusRulesFilename;
- private String prometheusRulesFileContent;
public PrometheusParams(ComponentCommandPayload componentCommandPayload) {
super(componentCommandPayload);
@@ -67,11 +67,13 @@ public class PrometheusParams extends InfraParams {
super.initGlobalParams();
setAgentScrapeJob();
+ setZookeeperScrapeJob();
+
scrapeJobs = new ArrayList<>();
scrapeJobs.add(prometheusScrapeJob);
scrapeJobs.add(agentScrapeJob);
+ scrapeJobs.addAll(zookeeperScrapeJobs);
globalParamsMap.put("scrape_jobs", scrapeJobs);
- globalParamsMap.put("rules_file_name", prometheusRulesFilename);
}
public String dataDir() {
@@ -111,15 +113,6 @@ public class PrometheusParams extends InfraParams {
return configuration;
}
- @GlobalParams
- public Map<String, Object> rules() {
- Map<String, Object> configuration =
LocalSettings.configurations(getServiceName(), "prometheus-rule");
-
- prometheusRulesFilename = (String)
configuration.get("rules_file_name");
- prometheusRulesFileContent = (String) configuration.get("content");
- return configuration;
- }
-
public String listenAddress() {
return MessageFormat.format("0.0.0.0:{0}", prometheusPort);
}
@@ -147,4 +140,33 @@ public class PrometheusParams extends InfraParams {
agentScrapeJob.put("targets_list", agentTargets);
}
+
+ public void setZookeeperScrapeJob() {
+ zookeeperScrapeJobs = new ArrayList<>();
+ Map<String, Map<String, Object>> configurations =
configurations("zookeeper", "zoo.cfg");
+ for (ClusterInfo clusterInfo : clusters()) {
+ Map<String, Object> zooCfg =
configurations.get(clusterInfo.getName());
+ Object metricsClass = zooCfg.get("metricsProvider.className");
+ String defaultProvider =
"org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider";
+ if (metricsClass == null || !metricsClass.equals(defaultProvider))
{
+ continue;
+ }
+
+ String clusterName = clusterInfo.getName();
+ String jobName = MessageFormat.format("{0}-zookeeper",
clusterName);
+ Map<String, Object> job = new HashMap<>();
+ job.put("name", jobName);
+ job.put("targets_file", targetsConfigFile(jobName));
+
+ Map<String, Object> target = new HashMap<>();
+ List<String> zkServers =
getComponentHosts("zookeeper_server").get(clusterName);
+ Object port = zooCfg.getOrDefault("metricsProvider.httpPort",
7000L);
+
+ List<String> targets = zkServers.stream().map(s -> s + ":" +
port).toList();
+ target.put("targets", targets);
+ job.put("targets_list", List.of(target));
+
+ zookeeperScrapeJobs.add(job);
+ }
+ }
}
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
index e2bf18e2..c3c3b931 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusServerScript.java
@@ -54,7 +54,7 @@ public class PrometheusServerScript extends
AbstractServerScript {
PrometheusParams prometheusParams = (PrometheusParams) params;
String cmd = MessageFormat.format(
"nohup {0}/prometheus --config.file={1}/prometheus.yml
--web.listen-address={2} --storage.tsdb.path={0}/data > {0}/nohup.out 2>&1 &",
- prometheusParams.serviceHome(), prometheusParams.confDir(),
prometheusParams.listenAddress());
+ prometheusParams.serviceHome(),
prometheusParams.serviceHome(), prometheusParams.listenAddress());
try {
ShellResult shellResult = LinuxOSUtils.sudoExecCmd(cmd,
prometheusParams.user());
if (shellResult.getExitCode() != 0) {
diff --git
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
index a67e7f49..a9e0979e 100644
---
a/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
+++
b/bigtop-manager-stack/bigtop-manager-stack-infra/src/main/java/org/apache/bigtop/manager/stack/infra/v1_0_0/prometheus/PrometheusSetup.java
@@ -45,16 +45,7 @@ public class PrometheusSetup {
LinuxFileUtils.toFileByTemplate(
prometheusParams.getPrometheusContent(),
- MessageFormat.format("{0}/prometheus.yml",
prometheusParams.confDir()),
- user,
- group,
- Constants.PERMISSION_644,
- prometheusParams.getGlobalParamsMap());
-
- LinuxFileUtils.toFileByTemplate(
- prometheusParams.getPrometheusRulesFileContent(),
- MessageFormat.format(
- "{0}/{1}", prometheusParams.confDir(),
prometheusParams.getPrometheusRulesFilename()),
+ MessageFormat.format("{0}/prometheus.yml",
prometheusParams.serviceHome()),
user,
group,
Constants.PERMISSION_644,